fix packaging and imports and introduce tests with pytest.

still issues with celery worker.
pull/300/head
Anton Larin 10 months ago
parent 9a393b4f74
commit 98a97f34f5

@ -0,0 +1,28 @@
name: Run python tests with pytest
on: [push, pull_request]
jobs:
build:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: ["3.9", "3.10", "3.11"]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install pytest
cd application
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Test with pytest
run: |
cd application
pytest

@ -37,9 +37,9 @@ from langchain.schema import HumanMessage, AIMessage
from pymongo import MongoClient from pymongo import MongoClient
from werkzeug.utils import secure_filename from werkzeug.utils import secure_filename
from core.settings import settings from application.core.settings import settings
from error import bad_request from application.error import bad_request
from worker import ingest_worker from application.worker import ingest_worker
from bson.objectid import ObjectId from bson.objectid import ObjectId
# os.environ["LANGCHAIN_HANDLER"] = "langchain" # os.environ["LANGCHAIN_HANDLER"] = "langchain"

@ -3,7 +3,7 @@ from abc import abstractmethod
from typing import Any, List from typing import Any, List
from langchain.docstore.document import Document as LCDocument from langchain.docstore.document import Document as LCDocument
from parser.schema.base import Document from application.parser.schema.base import Document
class BaseReader: class BaseReader:

@ -3,15 +3,15 @@ import logging
from pathlib import Path from pathlib import Path
from typing import Callable, Dict, List, Optional, Union from typing import Callable, Dict, List, Optional, Union
from parser.file.base import BaseReader from application.parser.file.base import BaseReader
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
from parser.file.docs_parser import DocxParser, PDFParser from application.parser.file.docs_parser import DocxParser, PDFParser
from parser.file.epub_parser import EpubParser from application.parser.file.epub_parser import EpubParser
from parser.file.html_parser import HTMLParser from application.parser.file.html_parser import HTMLParser
from parser.file.markdown_parser import MarkdownParser from application.parser.file.markdown_parser import MarkdownParser
from parser.file.rst_parser import RstParser from application.parser.file.rst_parser import RstParser
from parser.file.tabular_parser import PandasCSVParser from application.parser.file.tabular_parser import PandasCSVParser
from parser.schema.base import Document from application.parser.schema.base import Document
DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = { DEFAULT_FILE_EXTRACTOR: Dict[str, BaseParser] = {
".pdf": PDFParser(), ".pdf": PDFParser(),

@ -6,7 +6,7 @@ Contains parsers for docx, pdf files.
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
class PDFParser(BaseParser): class PDFParser(BaseParser):

@ -6,7 +6,7 @@ Contains parsers for epub files.
from pathlib import Path from pathlib import Path
from typing import Dict from typing import Dict
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
class EpubParser(BaseParser): class EpubParser(BaseParser):

@ -7,7 +7,7 @@ import re
from pathlib import Path from pathlib import Path
from typing import Dict, Union from typing import Dict, Union
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
class HTMLParser(BaseParser): class HTMLParser(BaseParser):

@ -8,7 +8,7 @@ from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union, cast from typing import Any, Dict, List, Optional, Tuple, Union, cast
import tiktoken import tiktoken
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
class MarkdownParser(BaseParser): class MarkdownParser(BaseParser):

@ -7,7 +7,7 @@ import re
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union from typing import Any, Dict, List, Optional, Tuple, Union
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
class RstParser(BaseParser): class RstParser(BaseParser):

@ -6,7 +6,7 @@ Contains parsers for tabular data files.
from pathlib import Path from pathlib import Path
from typing import Any, Dict, List, Union from typing import Any, Dict, List, Union
from parser.file.base_parser import BaseParser from application.parser.file.base_parser import BaseParser
class CSVParser(BaseParser): class CSVParser(BaseParser):

@ -2,7 +2,7 @@
from dataclasses import dataclass from dataclasses import dataclass
from langchain.docstore.document import Document as LCDocument from langchain.docstore.document import Document as LCDocument
from parser.schema.schema import BaseDocument from application.parser.schema.schema import BaseDocument
@dataclass @dataclass

@ -3,7 +3,7 @@ from math import ceil
from typing import List from typing import List
import tiktoken import tiktoken
from parser.schema.base import Document from application.parser.schema.base import Document
def separate_header_and_body(text): def separate_header_and_body(text):

@ -73,6 +73,7 @@ pymongo==4.3.3
pyowm==3.3.0 pyowm==3.3.0
PyPDF2==3.0.1 PyPDF2==3.0.1
PySocks==1.7.1 PySocks==1.7.1
pytest
python-dateutil==2.8.2 python-dateutil==2.8.2
python-dotenv==1.0.0 python-dotenv==1.0.0
python-jose==3.3.0 python-jose==3.3.0

@ -0,0 +1,37 @@
from application.app import get_vectorstore
# Test cases for get_vectorstore function
def test_no_active_docs():
data = {}
assert get_vectorstore(data) == ""
def test_default_active_docs():
data = {"active_docs": "default"}
assert get_vectorstore(data) == ""
def test_local_default_active_docs():
data = {"active_docs": "local/default"}
assert get_vectorstore(data) == ""
def test_local_custom_active_docs():
data = {"active_docs": "local/custom_index"}
assert get_vectorstore(data) == "indexes/local/custom_index"
def test_remote_active_docs():
data = {"active_docs": "remote_index"}
assert get_vectorstore(data) == "vectors/remote_index"
def test_active_docs_not_in_data():
data = {"other_key": "value"}
assert get_vectorstore(data) == ""
def test_multiple_slashes_in_active_docs():
data = {"active_docs": "local/some/other/index"}
assert get_vectorstore(data) == "indexes/local/some/other/index"

@ -7,11 +7,11 @@ from urllib.parse import urljoin
import nltk import nltk
import requests import requests
from core.settings import settings from application.core.settings import settings
from parser.file.bulk import SimpleDirectoryReader from application.parser.file.bulk import SimpleDirectoryReader
from parser.open_ai_func import call_openai_api from application.parser.open_ai_func import call_openai_api
from parser.schema.base import Document from application.parser.schema.base import Document
from parser.token_func import group_split from application.parser.token_func import group_split
try: try:
nltk.download('punkt', quiet=True) nltk.download('punkt', quiet=True)

@ -1,4 +1,4 @@
from app import app from application.app import app
if __name__ == "__main__": if __name__ == "__main__":
app.run(debug=True, port=7091) app.run(debug=True, port=7091)

@ -13,6 +13,7 @@ services:
backend: backend:
build: ./application build: ./application
working_dir: /application
environment: environment:
- API_KEY=$OPENAI_API_KEY - API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY - EMBEDDINGS_KEY=$OPENAI_API_KEY
@ -27,16 +28,17 @@ services:
ports: ports:
- "7091:7091" - "7091:7091"
volumes: volumes:
- ./application/indexes:/app/indexes - ./application/indexes:/application/indexes
- ./application/inputs:/app/inputs - ./application/inputs:/application/inputs
- ./application/vectors:/app/vectors - ./application/vectors:/application/vectors
depends_on: depends_on:
- redis - redis
- mongo - mongo
worker: worker:
build: ./application build: ./application
command: celery -A app.celery worker -l INFO working_dir: /application
command: celery -A application.app.celery worker -l INFO
environment: environment:
- API_KEY=$OPENAI_API_KEY - API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY - EMBEDDINGS_KEY=$OPENAI_API_KEY

@ -13,6 +13,7 @@ services:
backend: backend:
build: ./application build: ./application
working_dir: /application
environment: environment:
- API_KEY=$OPENAI_API_KEY - API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY - EMBEDDINGS_KEY=$OPENAI_API_KEY
@ -22,16 +23,17 @@ services:
ports: ports:
- "7091:7091" - "7091:7091"
volumes: volumes:
- ./application/indexes:/app/indexes - ./application/indexes:/application/indexes
- ./application/inputs:/app/inputs - ./application/inputs:/application/inputs
- ./application/vectors:/app/vectors - ./application/vectors:/application/vectors
depends_on: depends_on:
- redis - redis
- mongo - mongo
worker: worker:
build: ./application build: ./application
command: celery -A app.celery worker -l INFO working_dir: /application
command: celery -A application.app.celery worker -l INFO
environment: environment:
- API_KEY=$OPENAI_API_KEY - API_KEY=$OPENAI_API_KEY
- EMBEDDINGS_KEY=$OPENAI_API_KEY - EMBEDDINGS_KEY=$OPENAI_API_KEY

@ -110,8 +110,6 @@ tenacity==8.2.2
threadpoolctl==3.2.0 threadpoolctl==3.2.0
tiktoken==0.4.0 tiktoken==0.4.0
tokenizers==0.13.3 tokenizers==0.13.3
torch==2.0.1
torchvision==0.15.2
tqdm==4.65.0 tqdm==4.65.0
transformers==4.31.0 transformers==4.31.0
typer==0.9.0 typer==0.9.0

Loading…
Cancel
Save