From 2404899e2872c017559d703def091e035c7bac4a Mon Sep 17 00:00:00 2001
From: Idan <ghsijns@gmail.com>
Date: Fri, 23 Jun 2023 14:56:14 +0300
Subject: [PATCH 1/3] Fixed request length bug, changed to as less used port

---
 application/.env_sample                      |  2 +-
 application/Dockerfile                       |  4 +-
 application/app.py                           | 22 ++++---
 application/core/settings.py                 |  2 +-
 application/wsgi.py                          |  2 +-
 docker-compose-azure.yaml                    |  6 +-
 docker-compose.yaml                          | 18 +++---
 extensions/chrome/popup.js                   |  2 +-
 extensions/discord/bot.py                    |  2 +-
 extensions/web-widget/src/js/script.js       |  2 +-
 frontend/.env.development                    |  2 +-
 frontend/src/conversation/conversationApi.ts | 68 ++++++++++++++++----
 setup.sh                                     |  6 +-
 13 files changed, 90 insertions(+), 48 deletions(-)

diff --git a/application/.env_sample b/application/.env_sample
index a9d5862..af270ee 100644
--- a/application/.env_sample
+++ b/application/.env_sample
@@ -3,7 +3,7 @@ EMBEDDINGS_KEY=your_api_key
 CELERY_BROKER_URL=redis://localhost:6379/0
 CELERY_RESULT_BACKEND=redis://localhost:6379/1
 MONGO_URI=mongodb://localhost:27017/docsgpt
-API_URL=http://localhost:5001
+API_URL=http://localhost:7091
 
 #For OPENAI on Azure
 OPENAI_API_BASE=
diff --git a/application/Dockerfile b/application/Dockerfile
index 3e2b1d6..1285972 100644
--- a/application/Dockerfile
+++ b/application/Dockerfile
@@ -18,6 +18,6 @@ COPY . /app
 ENV FLASK_APP=app.py
 ENV FLASK_DEBUG=true
 
-EXPOSE 5001
+EXPOSE 7091
 
-CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:5001", "wsgi:app"]
+CMD ["gunicorn", "-w", "2", "--timeout", "120", "--bind", "0.0.0.0:7091", "wsgi:app"]
diff --git a/application/app.py b/application/app.py
index 52046cb..7f84030 100644
--- a/application/app.py
+++ b/application/app.py
@@ -43,6 +43,7 @@ from worker import ingest_worker
 # os.environ["LANGCHAIN_HANDLER"] = "langchain"
 
 logger = logging.getLogger(__name__)
+gpt_model = 'gpt-3.5-turbo' # gpt-4
 
 if settings.LLM_NAME == "manifest":
     from manifest import Manifest
@@ -195,7 +196,7 @@ def complete_stream(question, docsearch, chat_history, api_key):
                     messages_combine.append({"role": "user", "content": i["prompt"]})
                     messages_combine.append({"role": "system", "content": i["response"]})
     messages_combine.append({"role": "user", "content": question})
-    completion = openai.ChatCompletion.create(model="gpt-3.5-turbo", engine=settings.AZURE_DEPLOYMENT_NAME,
+    completion = openai.ChatCompletion.create(model=gpt_model, engine=settings.AZURE_DEPLOYMENT_NAME,
                                               messages=messages_combine, stream=True, max_tokens=500, temperature=0)
 
     for line in completion:
@@ -208,26 +209,27 @@ def complete_stream(question, docsearch, chat_history, api_key):
     yield f"data: {data}\n\n"
 
 
-@app.route("/stream", methods=["POST", "GET"])
+@app.route("/stream", methods=["POST"])
 def stream():
+    data = request.get_json()
     # get parameter from url question
-    question = request.args.get("question")
-    history = request.args.get("history")
+    question = data["question"]
+    history = data["history"]
     # history to json object from string
     history = json.loads(history)
 
     # check if active_docs is set
 
     if not api_key_set:
-        api_key = request.args.get("api_key")
+        api_key = data["api_key"]
     else:
         api_key = settings.API_KEY
     if not embeddings_key_set:
-        embeddings_key = request.args.get("embeddings_key")
+        embeddings_key = data["embeddings_key"]
     else:
         embeddings_key = settings.EMBEDDINGS_KEY
-    if "active_docs" in request.args:
-        vectorstore = get_vectorstore({"active_docs": request.args.get("active_docs")})
+    if "active_docs" in data:
+        vectorstore = get_vectorstore({"active_docs": data["active_docs"]})
     else:
         vectorstore = ""
     docsearch = get_docsearch(vectorstore, embeddings_key)
@@ -279,7 +281,7 @@ def api_answer():
                 )
             else:
                 logger.debug("plain OpenAI")
-                llm = ChatOpenAI(openai_api_key=api_key)  # optional parameter: model_name="gpt-4"
+                llm = ChatOpenAI(openai_api_key=api_key, model_name=gpt_model)  # optional parameter: model_name="gpt-4"
             messages_combine = [SystemMessagePromptTemplate.from_template(chat_combine_template)]
             if history:
                 tokens_current_history = 0
@@ -597,4 +599,4 @@ def after_request(response):
 
 
 if __name__ == "__main__":
-    app.run(debug=True, port=5001)
+    app.run(debug=True, port=7091)
diff --git a/application/core/settings.py b/application/core/settings.py
index 853f152..34c8c02 100644
--- a/application/core/settings.py
+++ b/application/core/settings.py
@@ -12,7 +12,7 @@ class Settings(BaseSettings):
     MODEL_PATH: str = "./models/gpt4all-model.bin"
     TOKENS_MAX_HISTORY: int = 150
 
-    API_URL: str = "http://localhost:5001"  # backend url for celery worker
+    API_URL: str = "http://localhost:7091"  # backend url for celery worker
 
     API_KEY: str = None  # LLM api key
     EMBEDDINGS_KEY: str = None  # api key for embeddings (if using openai, just copy API_KEY
diff --git a/application/wsgi.py b/application/wsgi.py
index a65465e..6b8b4d0 100644
--- a/application/wsgi.py
+++ b/application/wsgi.py
@@ -1,4 +1,4 @@
 from app import app
 
 if __name__ == "__main__":
-    app.run(debug=True, port=5001)
+    app.run(debug=True, port=7091)
diff --git a/docker-compose-azure.yaml b/docker-compose-azure.yaml
index 773196d..a015eef 100644
--- a/docker-compose-azure.yaml
+++ b/docker-compose-azure.yaml
@@ -4,7 +4,7 @@ services:
   frontend:
     build: ./frontend
     environment:
-      - VITE_API_HOST=http://localhost:5001
+      - VITE_API_HOST=http://localhost:7091
       - VITE_API_STREAMING=$VITE_API_STREAMING
     ports:
       - "5173:5173"
@@ -25,7 +25,7 @@ services:
       - AZURE_DEPLOYMENT_NAME=$AZURE_DEPLOYMENT_NAME
       - AZURE_EMBEDDINGS_DEPLOYMENT_NAME=$AZURE_EMBEDDINGS_DEPLOYMENT_NAME
     ports:
-      - "5001:5001"
+      - "7091:7091"
     volumes:
       - ./application/indexes:/app/indexes
       - ./application/inputs:/app/inputs
@@ -43,7 +43,7 @@ services:
       - CELERY_BROKER_URL=redis://redis:6379/0
       - CELERY_RESULT_BACKEND=redis://redis:6379/1
       - MONGO_URI=mongodb://mongo:27017/docsgpt
-      - API_URL=http://backend:5001
+      - API_URL=http://backend:7091
       - OPENAI_API_KEY=$OPENAI_API_KEY
       - OPENAI_API_BASE=$OPENAI_API_BASE
       - OPENAI_API_VERSION=$OPENAI_API_VERSION
diff --git a/docker-compose.yaml b/docker-compose.yaml
index c06b61b..a8917af 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -4,7 +4,7 @@ services:
   frontend:
     build: ./frontend
     environment:
-      - VITE_API_HOST=http://localhost:5001
+      - VITE_API_HOST=http://localhost:7091
       - VITE_API_STREAMING=$VITE_API_STREAMING
     ports:
       - "5173:5173"
@@ -20,14 +20,14 @@ services:
       - CELERY_RESULT_BACKEND=redis://redis:6379/1
       - MONGO_URI=mongodb://mongo:27017/docsgpt
     ports:
-      - "5001:5001"
+      - "7091:7091"
     volumes:
       - ./application/indexes:/app/indexes
       - ./application/inputs:/app/inputs
       - ./application/vectors:/app/vectors
     depends_on:
-        - redis
-        - mongo
+      - redis
+      - mongo
 
   worker:
     build: ./application
@@ -38,10 +38,10 @@ services:
       - CELERY_BROKER_URL=redis://redis:6379/0
       - CELERY_RESULT_BACKEND=redis://redis:6379/1
       - MONGO_URI=mongodb://mongo:27017/docsgpt
-      - API_URL=http://backend:5001
+      - API_URL=http://backend:7091
     depends_on:
-        - redis
-        - mongo
+      - redis
+      - mongo
 
   redis:
     image: redis:6-alpine
@@ -55,7 +55,5 @@ services:
     volumes:
       - mongodb_data_container:/data/db
 
-
-
 volumes:
-  mongodb_data_container:
\ No newline at end of file
+  mongodb_data_container:
diff --git a/extensions/chrome/popup.js b/extensions/chrome/popup.js
index 8f6c4de..20f7bce 100644
--- a/extensions/chrome/popup.js
+++ b/extensions/chrome/popup.js
@@ -21,7 +21,7 @@ document.getElementById("message-form").addEventListener("submit", function(even
     }
 
     // send post request to server http://127.0.0.1:5000/ with message in json body
-    fetch('http://127.0.0.1:5001/api/answer', {
+    fetch('http://127.0.0.1:7091/api/answer', {
       method: 'POST',
       headers: {
         'Content-Type': 'application/json',
diff --git a/extensions/discord/bot.py b/extensions/discord/bot.py
index 10d6646..3cb1d1e 100644
--- a/extensions/discord/bot.py
+++ b/extensions/discord/bot.py
@@ -11,7 +11,7 @@ dotenv.load_dotenv()
 # Replace 'YOUR_BOT_TOKEN' with your bot's token
 TOKEN = os.getenv("DISCORD_TOKEN")
 PREFIX = '@DocsGPT'
-BASE_API_URL = 'http://localhost:5001'
+BASE_API_URL = 'http://localhost:7091'
 
 intents = discord.Intents.default()
 intents.message_content = True
diff --git a/extensions/web-widget/src/js/script.js b/extensions/web-widget/src/js/script.js
index 56c9fe9..79bc729 100644
--- a/extensions/web-widget/src/js/script.js
+++ b/extensions/web-widget/src/js/script.js
@@ -1,4 +1,4 @@
-const API_ENDPOINT = "http://localhost:5001/api/answer"; // Replace with your API endpoint
+const API_ENDPOINT = "http://localhost:7091/api/answer"; // Replace with your API endpoint
 
 const widgetInitMessage = document.getElementById("docsgpt-init-message");
 const widgetAnswerMessage = document.getElementById("docsgpt-answer");
diff --git a/frontend/.env.development b/frontend/.env.development
index 1b37f20..b09c468 100644
--- a/frontend/.env.development
+++ b/frontend/.env.development
@@ -1,2 +1,2 @@
 # Please put appropriate value
-VITE_API_HOST=http://localhost:5001
\ No newline at end of file
+VITE_API_HOST=http://localhost:7091
\ No newline at end of file
diff --git a/frontend/src/conversation/conversationApi.ts b/frontend/src/conversation/conversationApi.ts
index 4bcbe05..f68d278 100644
--- a/frontend/src/conversation/conversationApi.ts
+++ b/frontend/src/conversation/conversationApi.ts
@@ -91,22 +91,64 @@ export function fetchAnswerSteaming(
   });
 
   return new Promise<Answer>((resolve, reject) => {
-    const url = new URL(apiHost + '/stream');
-    url.searchParams.append('question', question);
-    url.searchParams.append('api_key', apiKey);
-    url.searchParams.append('embeddings_key', apiKey);
-    url.searchParams.append('active_docs', docPath);
-    url.searchParams.append('history', JSON.stringify(history));
+    const body = {
+      question: question,
+      api_key: apiKey,
+      embeddings_key: apiKey,
+      active_docs: docPath,
+      history: JSON.stringify(history),
+    };
+  
+    fetch(apiHost + '/stream', {
+      method: 'POST',
+      headers: {
+        'Content-Type': 'application/json',
+      },
+      body: JSON.stringify(body),
+    })
+      .then((response) => {
+        if (!response.body) throw Error("No response body");
+  
+        const reader = response.body.getReader();
+        const decoder = new TextDecoder('utf-8');
+        var counterrr = 0
+        const processStream = ({ done, value }: ReadableStreamReadResult<Uint8Array>) => {
+          if (done) {
+            console.log(counterrr);
+            return;
+          }
 
-    const eventSource = new EventSource(url.href);
+          counterrr += 1;
+          
+          const chunk = decoder.decode(value);
 
-    eventSource.onmessage = onEvent;
+          const lines = chunk.split("\n");
 
-    eventSource.onerror = (error) => {
-      console.log('Connection failed.');
-      eventSource.close();
-    };
-  });
+          for (let line of lines) {
+            if (line.trim() == "") {
+              continue;
+            }
+            if (line.startsWith('data:')) {
+              line = line.substring(5);
+            }
+            
+            const messageEvent: MessageEvent = new MessageEvent("message", {
+              data: line,
+            });
+
+            onEvent(messageEvent); // handle each message
+          }
+  
+          reader.read().then(processStream).catch(reject);
+        }
+  
+        reader.read().then(processStream).catch(reject);
+      })
+      .catch((error) => {
+        console.error('Connection failed:', error);
+        reject(error);
+      });
+  });  
 }
 
 export function sendFeedback(
diff --git a/setup.sh b/setup.sh
index 168ce87..cd5712b 100755
--- a/setup.sh
+++ b/setup.sh
@@ -15,7 +15,7 @@ docker run -d --name redis -p 6379:6379 redis:6-alpine
 docker run -d --name mongo -p 27017:27017 -v mongodb_data_container:/data/db mongo:6
 
 # Run backend and worker services
-docker run -d --name backend -p 5001:5001 \
+docker run -d --name backend -p 7091:7091 \
   --link redis:redis --link mongo:mongo \
   -v $(pwd)/application/indexes:/app/indexes \
   -v $(pwd)/application/inputs:/app/inputs \
@@ -34,12 +34,12 @@ docker run -d --name worker \
   -e CELERY_BROKER_URL=redis://redis:6379/0 \
   -e CELERY_RESULT_BACKEND=redis://redis:6379/1 \
   -e MONGO_URI=mongodb://mongo:27017/docsgpt \
-  -e API_URL=http://backend:5001 \
+  -e API_URL=http://backend:7091 \
   backend_image \
   celery -A app.celery worker -l INFO
 
 # Run frontend service
 docker run -d --name frontend -p 5173:5173 \
-  -e VITE_API_HOST=http://localhost:5001 \
+  -e VITE_API_HOST=http://localhost:7091 \
   frontend_image
 

From 897b4ef2cdba3abe49cd99c99ce556b0df780802 Mon Sep 17 00:00:00 2001
From: Idan <ghsijns@gmail.com>
Date: Fri, 23 Jun 2023 14:57:29 +0300
Subject: [PATCH 2/3] Fixed a bug with reading md files

---
 scripts/parser/file/markdown_parser.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/scripts/parser/file/markdown_parser.py b/scripts/parser/file/markdown_parser.py
index d8aeb3b..2b4223d 100644
--- a/scripts/parser/file/markdown_parser.py
+++ b/scripts/parser/file/markdown_parser.py
@@ -119,8 +119,12 @@ class MarkdownParser(BaseParser):
             self, filepath: Path, errors: str = "ignore"
     ) -> List[Tuple[Optional[str], str]]:
         """Parse file into tuples."""
-        with open(filepath, "r") as f:
-            content = f.read()
+        with open(filepath, "r", encoding='utf8') as f:
+            try:
+                content = f.read()
+            except (Exception,) as e:
+                print(f'Error a file: "{filepath}"')
+                raise e
         if self._remove_hyperlinks:
             content = self.remove_hyperlinks(content)
         if self._remove_images:

From c216bea031e8e44b41705c1475adcd213b2997b3 Mon Sep 17 00:00:00 2001
From: Alex <a@tushynski.me>
Date: Sun, 25 Jun 2023 10:51:45 +0700
Subject: [PATCH 3/3] Update app.py

---
 application/app.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/application/app.py b/application/app.py
index 7f84030..209fcf6 100644
--- a/application/app.py
+++ b/application/app.py
@@ -43,7 +43,10 @@ from worker import ingest_worker
 # os.environ["LANGCHAIN_HANDLER"] = "langchain"
 
 logger = logging.getLogger(__name__)
-gpt_model = 'gpt-3.5-turbo' # gpt-4
+if settings.LLM_NAME == "gpt4":
+    gpt_model = 'gpt-4'
+else:
+    gpt_model = 'gpt-3.5-turbo'
 
 if settings.LLM_NAME == "manifest":
     from manifest import Manifest