Fixing ingestion metadata grouping

feature/remote-loads
Pavel 3 months ago
parent 325a8889ab
commit 54d187a0ad

1
.gitignore vendored

@ -172,3 +172,4 @@ application/vectors/
node_modules/
.vscode/settings.json
models/
model/

@ -1,6 +1,6 @@
"""Base reader class."""
from abc import abstractmethod
from typing import Any, List, Iterator
from typing import Any, List
from langchain.docstore.document import Document as LCDocument
from application.parser.schema.base import Document

@ -1,4 +1,4 @@
from langchain.document_loader import TelegramChatApiLoader, TelegramChatFileLoader
from langchain.document_loader import TelegramChatApiLoader
from application.parser.remote.base import BaseRemote
class TelegramChatApiRemote(BaseRemote):
@ -8,4 +8,4 @@ class TelegramChatApiRemote(BaseRemote):
def parse_file(self, *args, **load_kwargs):
return text
return

@ -124,7 +124,7 @@ def ingest_worker(self, directory, formats, name_job, filename, user):
}
def remote_worker(self, source_data, name_job, user, directory = 'temp', loader = 'url'):
sample = False
# sample = False
token_check = True
min_tokens = 150
max_tokens = 1250
@ -155,10 +155,10 @@ def remote_worker(self, source_data, name_job, user, directory = 'temp', loader
if settings.VECTOR_STORE == "faiss":
files = {'file_faiss': open(full_path + '/index.faiss', 'rb'),
'file_pkl': open(full_path + '/index.pkl', 'rb')}
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
response = requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
requests.post(urljoin(settings.API_URL, "/api/upload_index"), files=files, data=file_data)
requests.get(urljoin(settings.API_URL, "/api/delete_old?path=" + full_path))
else:
response = requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
requests.post(urljoin(settings.API_URL, "/api/upload_index"), data=file_data)
shutil.rmtree(full_path)

Loading…
Cancel
Save