chore: Update Docker build platforms for application and frontend and optimised embedding import

pull/958/head
Alex 2 weeks ago
parent 4534cafd3f
commit 5c8133a810

@ -13,7 +13,6 @@ jobs:
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v3
@ -36,14 +35,13 @@ jobs:
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}
# Runs a single command using the runners shell
- name: Build and push Docker images to docker.io and ghcr.io
uses: docker/build-push-action@v4
with:
file: './application/Dockerfile'
platforms: linux/amd64
platforms: linux/amd64,linux/arm64
context: ./application
push: true
tags: |
${{ secrets.DOCKER_USERNAME }}/docsgpt:latest
ghcr.io/${{ github.repository_owner }}/docsgpt:latest
ghcr.io/${{ github.repository_owner }}/docsgpt:latest

@ -8,11 +8,11 @@ on:
jobs:
deploy:
if: github.repository == 'arc53/DocsGPT'
runs-on: ubuntu-latest
permissions:
contents: read
packages: write
steps:
- uses: actions/checkout@v3
@ -40,7 +40,7 @@ jobs:
uses: docker/build-push-action@v4
with:
file: './frontend/Dockerfile'
platforms: linux/amd64
platforms: linux/amd64, linux/arm64
context: ./frontend
push: true
tags: |

@ -15,7 +15,6 @@ html2text==2020.1.16
javalang==0.13.0
langchain==0.1.4
langchain-openai==0.0.5
nltk==3.8.1
openapi3_parser==1.1.16
pandas==2.2.0
pydantic_settings==2.1.0

@ -1,6 +1,6 @@
from transformers import GPT2TokenizerFast
tokenizer = GPT2TokenizerFast.from_pretrained('gpt2')
tokenizer.model_max_length = 100000
def count_tokens(string):
return len(tokenizer(string)['input_ids'])

@ -45,10 +45,15 @@ class BaseVectorStore(ABC):
cohere_api_key=embeddings_key
)
elif embeddings_name == "huggingface_sentence-transformers/all-mpnet-base-v2":
embedding_instance = embeddings_factory[embeddings_name](
#model_name="./model/all-mpnet-base-v2",
model_kwargs={"device": "cpu"},
)
if os.path.exists("./model/all-mpnet-base-v2"):
embedding_instance = embeddings_factory[embeddings_name](
model_name="./model/all-mpnet-base-v2",
model_kwargs={"device": "cpu"},
)
else:
embedding_instance = embeddings_factory[embeddings_name](
model_kwargs={"device": "cpu"},
)
else:
embedding_instance = embeddings_factory[embeddings_name]()

@ -4,7 +4,6 @@ import string
import zipfile
from urllib.parse import urljoin
import nltk
import requests
from application.core.settings import settings
@ -14,13 +13,6 @@ from application.parser.open_ai_func import call_openai_api
from application.parser.schema.base import Document
from application.parser.token_func import group_split
try:
nltk.download("punkt", quiet=True)
nltk.download("averaged_perceptron_tagger", quiet=True)
except FileExistsError:
pass
# Define a function to extract metadata from a given filename.
def metadata_from_filename(title):
store = "/".join(title.split("/")[1:3])

Loading…
Cancel
Save