diff --git a/application/parser/file/bulk.py b/application/parser/file/bulk.py index 593681e..af17193 100644 --- a/application/parser/file/bulk.py +++ b/application/parser/file/bulk.py @@ -62,7 +62,6 @@ class SimpleDirectoryReader(BaseReader): file_extractor: Optional[Dict[str, BaseParser]] = None, num_files_limit: Optional[int] = None, file_metadata: Optional[Callable[[str], Dict]] = None, - chunk_size_max: int = 2048, ) -> None: """Initialize with parameters.""" super().__init__() diff --git a/scripts/code_docs_gen.py b/scripts/code_docs_gen.py index 50edf3f..c5c2d14 100644 --- a/scripts/code_docs_gen.py +++ b/scripts/code_docs_gen.py @@ -3,7 +3,7 @@ import json from pathlib import Path import dotenv -from langchain.llms import OpenAI +from langchain_community.llms import OpenAI from langchain.prompts import PromptTemplate dotenv.load_dotenv() diff --git a/scripts/old/ingest_rst.py b/scripts/old/ingest_rst.py index d086ae7..816ac6e 100644 --- a/scripts/old/ingest_rst.py +++ b/scripts/old/ingest_rst.py @@ -6,7 +6,7 @@ from pathlib import Path import dotenv import faiss import tiktoken -from langchain.embeddings import OpenAIEmbeddings +from langchain_openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS diff --git a/scripts/old/ingest_rst_sphinx.py b/scripts/old/ingest_rst_sphinx.py index 132cb68..ddafda8 100644 --- a/scripts/old/ingest_rst_sphinx.py +++ b/scripts/old/ingest_rst_sphinx.py @@ -8,7 +8,7 @@ from pathlib import Path import dotenv import faiss import tiktoken -from langchain.embeddings import OpenAIEmbeddings +from langchain_openai import OpenAIEmbeddings from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from sphinx.cmd.build import main as sphinx_main diff --git a/scripts/parser/file/bulk.py b/scripts/parser/file/bulk.py index 8b5bd40..8f59819 100644 --- a/scripts/parser/file/bulk.py +++ b/scripts/parser/file/bulk.py @@ -61,7 +61,6 @@ class SimpleDirectoryReader(BaseReader): file_extractor: Optional[Dict[str, BaseParser]] = None, num_files_limit: Optional[int] = None, file_metadata: Optional[Callable[[str], Dict]] = None, - chunk_size_max: int = 2048, ) -> None: """Initialize with parameters.""" super().__init__() diff --git a/scripts/parser/open_ai_func.py b/scripts/parser/open_ai_func.py index 63b0663..91b9f69 100644 --- a/scripts/parser/open_ai_func.py +++ b/scripts/parser/open_ai_func.py @@ -1,8 +1,8 @@ import os import tiktoken -from langchain.embeddings import OpenAIEmbeddings -from langchain.vectorstores import FAISS +from langchain_openai import OpenAIEmbeddings +from langchain_community.vectorstores import FAISS from retry import retry diff --git a/scripts/parser/py2doc.py b/scripts/parser/py2doc.py index 3a8175d..d8f3988 100644 --- a/scripts/parser/py2doc.py +++ b/scripts/parser/py2doc.py @@ -3,7 +3,7 @@ import os from pathlib import Path import tiktoken -from langchain.llms import OpenAI +from langchain_community.llms import OpenAI from langchain.prompts import PromptTemplate diff --git a/scripts/requirements.txt b/scripts/requirements.txt index c1bcb63..4e4fe35 100644 --- a/scripts/requirements.txt +++ b/scripts/requirements.txt @@ -1,145 +1,22 @@ -aiodns==3.1.1 -aiohttp==3.9.1 -aiohttp-retry==2.8.3 -aiosignal==1.3.1 -amqp==5.2.0 -annotated-types==0.6.0 -anthropic==0.8.0 -anyio==4.2.0 -async-timeout==4.0.3 -attrs==23.1.0 -billiard==4.2.0 -blinker==1.7.0 -blobfile==2.1.1 -boto3==1.34.6 -botocore==1.34.6 -celery==5.3.6 -certifi==2023.11.17 -cffi==1.16.0 -chardet==5.2.0 -charset-normalizer==3.3.2 -click==8.1.7 -click-didyoumean==0.3.0 -click-plugins==1.1.1 -click-repl==0.3.0 -cryptography==41.0.7 -dataclasses-json==0.6.3 -decorator==5.1.1 -dill==0.3.7 -distro==1.8.0 -dnspython==2.4.2 +dataclasses_json==0.6.3 docx2txt==0.8 -ecdsa==0.18.0 -elastic-transport==8.11.0 -elasticsearch==8.11.1 -entrypoints==0.4 -faiss-cpu==1.7.4 -filelock==3.13.1 -Flask==3.0.0 -Flask-Cors==4.0.0 -frozenlist==1.4.1 -fsspec==2023.12.2 -geojson==2.5.0 -greenlet==3.0.3 -gunicorn==21.2.0 -h11==0.14.0 -httpcore==1.0.2 -httpx==0.26.0 -huggingface-hub==0.20.1 -humbug==0.3.2 -idna==3.6 -iniconfig==2.0.0 -itsdangerous==2.1.2 -Jinja2==3.1.2 -jmespath==1.0.1 -joblib==1.3.2 -jsonpatch==1.33 -jsonpointer==2.4 -kombu==5.3.4 -langchain==0.0.352 -langchain-community==0.0.6 -langchain-core==0.1.3 -langsmith==0.0.74 -lazy-object-proxy==1.10.0 -loguru==0.7.2 -lxml==4.9.4 -MarkupSafe==2.1.3 -marshmallow==3.20.1 -marshmallow-enum==1.5.1 -mpmath==1.3.0 -multidict==6.0.4 -multiprocess==0.70.15 -mypy-extensions==1.0.0 -networkx==3.2.1 +EbookLib==0.18 +escodegen==1.0.11 +esprima==4.0.1 +faiss_cpu==1.7.4 +html2text==2020.1.16 +javalang==0.13.0 +langchain==0.1.4 +langchain_community==0.0.16 +langchain-openai==0.0.5 nltk==3.8.1 -npx==0.1.1 -numcodecs==0.12.1 -numpy==1.26.2 -openai==1.6.1 -openapi-schema-validator==0.6.2 -openapi-spec-validator==0.6.0 -openapi3-parser==1.1.16 -packaging==23.2 -pathable==0.4.3 -pathos==0.3.1 -Pillow==10.1.0 -pluggy==1.3.0 -pox==0.3.3 -ppft==1.7.6.7 -prance==23.6.21.0 -prompt-toolkit==3.0.43 -pyasn1==0.5.1 -pycares==4.4.0 -pycparser==2.21 -pycryptodome==3.19.0 -pycryptodomex==3.19.0 -pydantic==2.5.3 -pydantic_core==2.14.6 -pydantic_settings==2.1.0 -PyJWT==2.8.0 -pymongo==4.6.1 -pyowm==3.3.0 +openapi_parser==0.2.6 +pandas==2.2.0 PyPDF2==3.0.1 -PySocks==1.7.1 -pytest==7.4.3 -python-dateutil==2.8.2 -python-dotenv==1.0.0 -python-jose==3.3.0 -python-liquid==1.10.2 -pytz==2023.3.post1 -PyYAML==6.0.1 -redis==5.0.1 -regex==2023.10.3 -requests==2.31.0 +python-dotenv==1.0.1 retry==0.9.2 -rfc3339-validator==0.1.4 -rpds-py==0.15.2 -rsa==4.9 -ruamel.yaml==0.18.5 -ruamel.yaml.clib==0.2.8 -s3transfer==0.10.0 -safetensors==0.4.1 -scikit-learn==1.3.2 -scipy==1.11.4 -sentence-transformers -sentencepiece==0.1.99 -six==1.16.0 -sniffio==1.3.0 -SQLAlchemy==2.0.23 -sympy==1.12 -tenacity==8.2.3 -threadpoolctl==3.2.0 -tiktoken -tokenizers==0.15.0 -torch==2.1.2 -torchvision==0.16.2 +Sphinx==7.2.6 +tiktoken==0.5.2 tqdm==4.66.1 -transformers==4.36.2 typer==0.9.0 -typing-inspect==0.9.0 -typing_extensions==4.9.0 -tzdata==2023.3 -vine==5.1.0 -wcwidth==0.2.12 -Werkzeug==3.0.1 -yarl==1.9.4 +unstructured==0.12.2