2022-12-04 00:42:59 +00:00
|
|
|
[tool.poetry]
|
|
|
|
name = "langchain"
|
2023-07-03 16:03:55 +00:00
|
|
|
version = "0.0.222"
|
2022-12-04 00:42:59 +00:00
|
|
|
description = "Building applications with LLMs through composability"
|
|
|
|
authors = []
|
|
|
|
license = "MIT"
|
|
|
|
readme = "README.md"
|
|
|
|
repository = "https://www.github.com/hwchase17/langchain"
|
|
|
|
|
2023-01-27 01:38:13 +00:00
|
|
|
[tool.poetry.scripts]
|
|
|
|
langchain-server = "langchain.server:main"
|
|
|
|
|
2022-12-04 00:42:59 +00:00
|
|
|
[tool.poetry.dependencies]
|
|
|
|
python = ">=3.8.1,<4.0"
|
|
|
|
pydantic = "^1"
|
2023-05-05 03:46:38 +00:00
|
|
|
SQLAlchemy = ">=1.4,<3"
|
2022-12-04 00:42:59 +00:00
|
|
|
requests = "^2"
|
2023-03-17 00:05:36 +00:00
|
|
|
PyYAML = ">=5.4.1"
|
2022-12-04 00:42:59 +00:00
|
|
|
numpy = "^1"
|
2023-04-27 16:02:39 +00:00
|
|
|
azure-core = {version = "^1.26.4", optional=true}
|
2023-04-20 03:48:47 +00:00
|
|
|
tqdm = {version = ">=4.48.0", optional = true}
|
2023-04-06 05:19:09 +00:00
|
|
|
openapi-schema-pydantic = "^1.2"
|
2022-12-04 00:42:59 +00:00
|
|
|
faiss-cpu = {version = "^1", optional = true}
|
|
|
|
wikipedia = {version = "^1", optional = true}
|
|
|
|
elasticsearch = {version = "^8", optional = true}
|
2023-02-21 02:39:34 +00:00
|
|
|
opensearch-py = {version = "^2.0.0", optional = true}
|
2022-12-22 17:31:27 +00:00
|
|
|
redis = {version = "^4", optional = true}
|
2022-12-04 00:42:59 +00:00
|
|
|
manifest-ml = {version = "^0.0.1", optional = true}
|
|
|
|
spacy = {version = "^3", optional = true}
|
|
|
|
nltk = {version = "^3", optional = true}
|
|
|
|
transformers = {version = "^4", optional = true}
|
2022-12-07 05:55:02 +00:00
|
|
|
beautifulsoup4 = {version = "^4", optional = true}
|
2023-04-14 04:38:49 +00:00
|
|
|
torch = {version = ">=1,<3", optional = true}
|
2022-12-19 21:40:39 +00:00
|
|
|
jinja2 = {version = "^3", optional = true}
|
2023-03-23 02:39:57 +00:00
|
|
|
tiktoken = {version = "^0.3.2", optional = true, python="^3.9"}
|
2022-12-21 03:24:08 +00:00
|
|
|
pinecone-client = {version = "^2", optional = true}
|
2023-04-12 04:32:17 +00:00
|
|
|
pinecone-text = {version = "^0.4.2", optional = true}
|
2023-05-30 14:59:01 +00:00
|
|
|
pymongo = {version = "^4.3.3", optional = true}
|
2023-04-22 16:17:38 +00:00
|
|
|
clickhouse-connect = {version="^0.5.14", optional=true}
|
2022-12-21 03:24:08 +00:00
|
|
|
weaviate-client = {version = "^3", optional = true}
|
2022-12-30 13:06:57 +00:00
|
|
|
google-api-python-client = {version = "2.70.0", optional = true}
|
2023-05-30 23:25:22 +00:00
|
|
|
google-auth = {version = "^2.18.1", optional = true}
|
2023-01-11 13:52:19 +00:00
|
|
|
wolframalpha = {version = "5.0.0", optional = true}
|
2023-04-15 00:22:01 +00:00
|
|
|
anthropic = {version = "^0.2.6", optional = true}
|
2023-04-07 19:24:32 +00:00
|
|
|
qdrant-client = {version = "^1.1.2", optional = true, python = ">=3.8.1,<3.12"}
|
2023-01-27 01:38:13 +00:00
|
|
|
dataclasses-json = "^0.5.7"
|
2023-01-31 08:00:08 +00:00
|
|
|
tensorflow-text = {version = "^2.11.0", optional = true, python = "^3.10, <3.12"}
|
2023-02-03 03:56:26 +00:00
|
|
|
tenacity = "^8.1.0"
|
2023-02-10 07:19:19 +00:00
|
|
|
cohere = {version = "^3", optional = true}
|
|
|
|
openai = {version = "^0", optional = true}
|
|
|
|
nlpcloud = {version = "^1", optional = true}
|
2023-02-27 06:11:38 +00:00
|
|
|
nomic = {version = "^1.0.43", optional = true}
|
2023-02-10 07:19:19 +00:00
|
|
|
huggingface_hub = {version = "^0", optional = true}
|
2023-06-29 06:04:11 +00:00
|
|
|
octoai-sdk = {version = "^0.1.1", optional = true}
|
2023-03-28 15:16:17 +00:00
|
|
|
jina = {version = "^3.14", optional = true}
|
2023-02-10 07:19:19 +00:00
|
|
|
google-search-results = {version = "^2", optional = true}
|
|
|
|
sentence-transformers = {version = "^2", optional = true}
|
2023-02-08 05:21:57 +00:00
|
|
|
aiohttp = "^3.8.3"
|
2023-04-19 23:53:34 +00:00
|
|
|
arxiv = {version = "^1.4", optional = true}
|
2023-02-10 18:07:26 +00:00
|
|
|
pypdf = {version = "^3.4.0", optional = true}
|
2023-02-13 07:01:26 +00:00
|
|
|
networkx = {version="^2.6.3", optional = true}
|
2023-02-25 16:48:02 +00:00
|
|
|
aleph-alpha-client = {version="^2.15.0", optional = true}
|
2023-06-17 00:53:55 +00:00
|
|
|
deeplake = {version = "^3.6.2", optional = true}
|
2023-03-15 04:13:58 +00:00
|
|
|
pgvector = {version = "^0.1.6", optional = true}
|
|
|
|
psycopg2-binary = {version = "^2.9.5", optional = true}
|
2023-03-28 19:02:14 +00:00
|
|
|
pyowm = {version = "^3.3.0", optional = true}
|
2023-04-06 19:54:32 +00:00
|
|
|
async-timeout = {version = "^4.0.0", python = "<3.11"}
|
2023-04-22 15:24:48 +00:00
|
|
|
azure-identity = {version = "^1.12.0", optional=true}
|
2023-04-12 21:16:58 +00:00
|
|
|
gptcache = {version = ">=0.1.7", optional = true}
|
2023-04-18 03:23:45 +00:00
|
|
|
atlassian-python-api = {version = "^3.36.0", optional=true}
|
|
|
|
pytesseract = {version = "^0.3.10", optional=true}
|
|
|
|
html2text = {version="^2020.1.16", optional=true}
|
2023-04-16 15:50:32 +00:00
|
|
|
numexpr = "^2.8.4"
|
2023-06-20 05:47:39 +00:00
|
|
|
duckduckgo-search = {version="^3.8.3", optional=true}
|
2023-04-25 05:15:12 +00:00
|
|
|
azure-cosmos = {version="^4.4.0b1", optional=true}
|
2023-04-27 15:36:00 +00:00
|
|
|
lark = {version="^1.1.5", optional=true}
|
2023-04-27 15:14:36 +00:00
|
|
|
lancedb = {version = "^0.1", optional = true}
|
2023-04-27 18:39:01 +00:00
|
|
|
pexpect = {version = "^4.8.0", optional = true}
|
2023-04-29 02:48:43 +00:00
|
|
|
pyvespa = {version = "^0.33.0", optional = true}
|
2023-05-04 05:55:34 +00:00
|
|
|
O365 = {version = "^2.0.26", optional = true}
|
2023-05-05 21:48:13 +00:00
|
|
|
jq = {version = "^1.4.1", optional = true}
|
2023-05-12 17:35:01 +00:00
|
|
|
steamship = {version = "^2.16.9", optional = true}
|
2023-05-10 13:35:07 +00:00
|
|
|
pdfminer-six = {version = "^20221105", optional = true}
|
2023-05-22 16:48:09 +00:00
|
|
|
docarray = {version="^0.32.0", extras=["hnswlib"], optional=true}
|
2023-05-15 14:53:00 +00:00
|
|
|
lxml = {version = "^4.9.2", optional = true}
|
2023-05-15 18:21:05 +00:00
|
|
|
pymupdf = {version = "^1.22.3", optional = true}
|
|
|
|
pypdfium2 = {version = "^4.10.0", optional = true}
|
2023-05-15 21:06:12 +00:00
|
|
|
gql = {version = "^3.4.1", optional = true}
|
2023-05-16 21:35:25 +00:00
|
|
|
pandas = {version = "^2.0.1", optional = true}
|
|
|
|
telethon = {version = "^1.28.5", optional = true}
|
2023-05-22 14:31:48 +00:00
|
|
|
neo4j = {version = "^5.8.1", optional = true}
|
2023-06-30 21:24:26 +00:00
|
|
|
zep-python = {version=">=0.32", optional=true}
|
2023-05-23 03:29:47 +00:00
|
|
|
langkit = {version = ">=0.0.1.dev3, <0.1.0", optional = true}
|
2023-05-18 13:55:14 +00:00
|
|
|
chardet = {version="^5.1.0", optional=true}
|
2023-05-19 22:27:50 +00:00
|
|
|
requests-toolbelt = {version = "^1.0.0", optional = true}
|
2023-05-23 01:09:53 +00:00
|
|
|
openlm = {version = "^0.0.5", optional = true}
|
2023-05-24 17:02:09 +00:00
|
|
|
scikit-learn = {version = "^1.2.2", optional = true}
|
2023-05-23 13:45:48 +00:00
|
|
|
azure-ai-formrecognizer = {version = "^3.2.1", optional = true}
|
|
|
|
azure-ai-vision = {version = "^0.11.1b1", optional = true}
|
|
|
|
azure-cognitiveservices-speech = {version = "^1.28.0", optional = true}
|
2023-05-30 02:47:56 +00:00
|
|
|
py-trello = {version = "^0.19.0", optional = true}
|
2023-05-26 02:13:21 +00:00
|
|
|
momento = {version = "^1.5.0", optional = true}
|
2023-05-25 07:21:31 +00:00
|
|
|
bibtexparser = {version = "^1.4.0", optional = true}
|
2023-06-20 05:08:58 +00:00
|
|
|
singlestoredb = {version = "^0.7.1", optional = true}
|
2023-05-30 03:23:17 +00:00
|
|
|
pyspark = {version = "^3.4.0", optional = true}
|
2023-06-22 15:00:15 +00:00
|
|
|
clarifai = {version = "9.1.0", optional = true}
|
2023-06-06 03:39:16 +00:00
|
|
|
tigrisdb = {version = "^1.0.0b6", optional = true}
|
2023-06-08 04:56:43 +00:00
|
|
|
nebula3-python = {version = "^3.4.0", optional = true}
|
2023-06-23 08:11:01 +00:00
|
|
|
langchainplus-sdk = ">=0.0.17"
|
2023-06-16 23:50:01 +00:00
|
|
|
awadb = {version = "^0.3.3", optional = true}
|
2023-06-12 04:15:42 +00:00
|
|
|
azure-search-documents = {version = "11.4.0a20230509004", source = "azure-sdk-dev", optional = true}
|
2023-06-27 22:58:47 +00:00
|
|
|
esprima = {version = "^4.0.1", optional = true}
|
2023-06-30 07:54:23 +00:00
|
|
|
openllm = {version = ">=0.1.19", optional = true}
|
2023-06-22 20:14:28 +00:00
|
|
|
streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
|
2023-06-28 05:26:38 +00:00
|
|
|
psychicapi = {version = "^0.8.0", optional = true}
|
2023-07-01 18:09:52 +00:00
|
|
|
cassio = {version = "^0.0.7", optional = true}
|
2023-06-18 23:55:18 +00:00
|
|
|
|
2023-01-05 05:39:50 +00:00
|
|
|
[tool.poetry.group.docs.dependencies]
|
|
|
|
autodoc_pydantic = "^1.8.0"
|
|
|
|
myst_parser = "^0.18.1"
|
|
|
|
nbsphinx = "^0.8.9"
|
|
|
|
sphinx = "^4.5.0"
|
|
|
|
sphinx-autobuild = "^2021.3.14"
|
|
|
|
sphinx_book_theme = "^0.3.3"
|
|
|
|
sphinx_rtd_theme = "^1.0.0"
|
|
|
|
sphinx-typlog-theme = "^0.8.0"
|
|
|
|
sphinx-panels = "^0.6.0"
|
|
|
|
toml = "^0.10.2"
|
|
|
|
myst-nb = "^0.17.1"
|
|
|
|
linkchecker = "^10.2.1"
|
2023-03-13 04:15:07 +00:00
|
|
|
sphinx-copybutton = "^0.5.1"
|
2022-12-04 00:42:59 +00:00
|
|
|
|
|
|
|
[tool.poetry.group.test.dependencies]
|
2023-05-16 18:48:56 +00:00
|
|
|
# The only dependencies that should be added are
|
|
|
|
# dependencies used for running tests (e.g., pytest, freezegun, response).
|
|
|
|
# Any dependencies that do not meet that criteria will be removed.
|
2023-04-12 03:45:36 +00:00
|
|
|
pytest = "^7.3.0"
|
2022-12-13 13:48:53 +00:00
|
|
|
pytest-cov = "^4.0.0"
|
2022-12-04 00:42:59 +00:00
|
|
|
pytest-dotenv = "^0.5.2"
|
2023-03-17 04:55:35 +00:00
|
|
|
duckdb-engine = "^0.7.0"
|
2022-12-28 22:13:08 +00:00
|
|
|
pytest-watcher = "^0.2.6"
|
2023-01-27 01:38:13 +00:00
|
|
|
freezegun = "^1.2.2"
|
2023-01-30 22:52:17 +00:00
|
|
|
responses = "^0.22.0"
|
2023-02-08 05:21:57 +00:00
|
|
|
pytest-asyncio = "^0.20.3"
|
2023-04-27 20:42:12 +00:00
|
|
|
lark = "^1.1.5"
|
2023-06-18 23:55:18 +00:00
|
|
|
pandas = "^2.0.0"
|
2023-04-29 04:54:24 +00:00
|
|
|
pytest-mock = "^3.10.0"
|
2023-05-16 18:41:24 +00:00
|
|
|
pytest-socket = "^0.6.0"
|
2023-06-11 22:51:28 +00:00
|
|
|
syrupy = "^4.0.2"
|
2022-12-04 00:42:59 +00:00
|
|
|
|
2023-04-05 13:51:32 +00:00
|
|
|
[tool.poetry.group.test_integration]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.test_integration.dependencies]
|
2023-05-26 17:29:07 +00:00
|
|
|
# Do not add dependencies in the test_integration group
|
|
|
|
# Instead:
|
|
|
|
# 1. Add an optional dependency to the main group
|
|
|
|
# poetry add --optional [package name]
|
|
|
|
# 2. Add the package name to the extended_testing extra (find it below)
|
|
|
|
# 3. Relock the poetry file
|
|
|
|
# poetry lock --no-update
|
2023-06-22 20:14:28 +00:00
|
|
|
# 4. Favor unit tests not integration tests.
|
2023-05-26 17:29:07 +00:00
|
|
|
# Use the @pytest.mark.requires(pkg_name) decorator in unit_tests.
|
|
|
|
# Your tests should not rely on network access, as it prevents other
|
|
|
|
# developers from being able to easily run them.
|
|
|
|
# Instead write unit tests that use the `responses` library or mock.patch with
|
|
|
|
# fixtures. Keep the fixtures minimal.
|
|
|
|
# See CONTRIBUTING.md for more instructions on working with optional dependencies.
|
|
|
|
# https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md#working-with-optional-dependencies
|
2023-04-07 14:28:57 +00:00
|
|
|
pytest-vcr = "^1.0.2"
|
|
|
|
wrapt = "^1.15.0"
|
2023-04-12 03:45:36 +00:00
|
|
|
openai = "^0.27.4"
|
|
|
|
elasticsearch = {extras = ["async"], version = "^8.6.2"}
|
2023-04-08 03:43:53 +00:00
|
|
|
redis = "^4.5.4"
|
|
|
|
pinecone-client = "^2.2.1"
|
2023-04-12 04:32:17 +00:00
|
|
|
pinecone-text = "^0.4.2"
|
2023-05-30 14:59:01 +00:00
|
|
|
pymongo = "^4.3.3"
|
2023-04-22 16:17:38 +00:00
|
|
|
clickhouse-connect = "^0.5.14"
|
2023-04-08 03:43:53 +00:00
|
|
|
transformers = "^4.27.4"
|
|
|
|
deeplake = "^3.2.21"
|
2023-04-14 05:37:34 +00:00
|
|
|
weaviate-client = "^3.15.5"
|
2023-04-12 04:32:17 +00:00
|
|
|
torch = "^1.0.0"
|
2023-04-12 03:45:36 +00:00
|
|
|
chromadb = "^0.3.21"
|
|
|
|
tiktoken = "^0.3.3"
|
2023-04-14 04:49:31 +00:00
|
|
|
python-dotenv = "^1.0.0"
|
2023-04-24 01:25:20 +00:00
|
|
|
sentence-transformers = "^2"
|
2023-04-14 04:49:31 +00:00
|
|
|
gptcache = "^0.1.9"
|
|
|
|
promptlayer = "^0.1.80"
|
2023-04-29 04:25:33 +00:00
|
|
|
tair = "^1.3.3"
|
2023-05-09 17:08:39 +00:00
|
|
|
wikipedia = "^1"
|
2023-07-01 18:09:52 +00:00
|
|
|
cassio = "^0.0.7"
|
2023-05-12 05:48:38 +00:00
|
|
|
arxiv = "^1.4"
|
2023-05-22 23:43:07 +00:00
|
|
|
mastodon-py = "^1.8.1"
|
2023-05-26 02:13:21 +00:00
|
|
|
momento = "^1.5.0"
|
2023-05-26 17:29:07 +00:00
|
|
|
# Please do not add any dependencies in the test_integration group
|
|
|
|
# See instructions above ^^
|
2023-04-05 13:51:32 +00:00
|
|
|
|
2022-12-04 00:42:59 +00:00
|
|
|
[tool.poetry.group.lint.dependencies]
|
2023-02-25 16:59:52 +00:00
|
|
|
ruff = "^0.0.249"
|
2022-12-05 05:12:05 +00:00
|
|
|
types-toml = "^0.10.8.1"
|
2023-01-27 01:38:13 +00:00
|
|
|
types-redis = "^4.3.21.6"
|
2023-06-30 14:48:02 +00:00
|
|
|
types-pytz = "^2023.3.0.0"
|
2023-02-10 19:38:24 +00:00
|
|
|
black = "^23.1.0"
|
2023-05-18 13:55:14 +00:00
|
|
|
types-chardet = "^5.0.4.6"
|
2023-06-22 15:00:15 +00:00
|
|
|
mypy-protobuf = "^3.0.0"
|
2022-12-04 00:42:59 +00:00
|
|
|
|
|
|
|
[tool.poetry.group.typing.dependencies]
|
|
|
|
mypy = "^0.991"
|
|
|
|
types-pyyaml = "^6.0.12.2"
|
|
|
|
types-requests = "^2.28.11.5"
|
|
|
|
|
|
|
|
[tool.poetry.group.dev]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
|
|
jupyter = "^1.0.0"
|
|
|
|
playwright = "^1.28.0"
|
2023-04-03 21:11:18 +00:00
|
|
|
setuptools = "^67.6.1"
|
2022-12-04 00:42:59 +00:00
|
|
|
|
|
|
|
[tool.poetry.extras]
|
2023-06-22 15:00:15 +00:00
|
|
|
llms = ["anthropic", "clarifai", "cohere", "openai", "openllm", "openlm", "nlpcloud", "huggingface_hub", "manifest-ml", "torch", "transformers"]
|
2023-04-04 13:48:21 +00:00
|
|
|
qdrant = ["qdrant-client"]
|
2023-05-11 19:21:06 +00:00
|
|
|
openai = ["openai", "tiktoken"]
|
2023-05-18 13:55:14 +00:00
|
|
|
text_helpers = ["chardet"]
|
2023-06-22 15:00:15 +00:00
|
|
|
clarifai = ["clarifai"]
|
2023-04-04 13:48:21 +00:00
|
|
|
cohere = ["cohere"]
|
2023-05-22 16:48:09 +00:00
|
|
|
docarray = ["docarray"]
|
2023-04-24 01:25:20 +00:00
|
|
|
embeddings = ["sentence-transformers"]
|
2023-06-27 22:58:47 +00:00
|
|
|
javascript = ["esprima"]
|
2023-06-12 04:15:42 +00:00
|
|
|
azure = [
|
|
|
|
"azure-identity",
|
|
|
|
"azure-cosmos",
|
|
|
|
"openai",
|
|
|
|
"azure-core",
|
|
|
|
"azure-ai-formrecognizer",
|
|
|
|
"azure-ai-vision",
|
|
|
|
"azure-cognitiveservices-speech",
|
|
|
|
"azure-search-documents",
|
|
|
|
]
|
2023-05-22 14:31:48 +00:00
|
|
|
all = [
|
|
|
|
"anthropic",
|
2023-06-22 15:00:15 +00:00
|
|
|
"clarifai",
|
2023-05-22 14:31:48 +00:00
|
|
|
"cohere",
|
|
|
|
"openai",
|
|
|
|
"nlpcloud",
|
|
|
|
"huggingface_hub",
|
|
|
|
"jina",
|
|
|
|
"manifest-ml",
|
|
|
|
"elasticsearch",
|
|
|
|
"opensearch-py",
|
|
|
|
"google-search-results",
|
|
|
|
"faiss-cpu",
|
|
|
|
"sentence-transformers",
|
|
|
|
"transformers",
|
|
|
|
"spacy",
|
|
|
|
"nltk",
|
|
|
|
"wikipedia",
|
|
|
|
"beautifulsoup4",
|
|
|
|
"tiktoken",
|
|
|
|
"torch",
|
|
|
|
"jinja2",
|
|
|
|
"pinecone-client",
|
|
|
|
"pinecone-text",
|
2023-05-30 14:59:01 +00:00
|
|
|
"pymongo",
|
2023-05-22 14:31:48 +00:00
|
|
|
"weaviate-client",
|
|
|
|
"redis",
|
|
|
|
"google-api-python-client",
|
2023-05-30 23:25:22 +00:00
|
|
|
"google-auth",
|
2023-05-22 14:31:48 +00:00
|
|
|
"wolframalpha",
|
|
|
|
"qdrant-client",
|
|
|
|
"tensorflow-text",
|
|
|
|
"pypdf",
|
|
|
|
"networkx",
|
|
|
|
"nomic",
|
|
|
|
"aleph-alpha-client",
|
|
|
|
"deeplake",
|
|
|
|
"pgvector",
|
|
|
|
"psycopg2-binary",
|
|
|
|
"pyowm",
|
|
|
|
"pytesseract",
|
|
|
|
"html2text",
|
|
|
|
"atlassian-python-api",
|
|
|
|
"gptcache",
|
|
|
|
"duckduckgo-search",
|
|
|
|
"arxiv",
|
|
|
|
"azure-identity",
|
|
|
|
"clickhouse-connect",
|
|
|
|
"azure-cosmos",
|
|
|
|
"lancedb",
|
2023-05-23 03:29:47 +00:00
|
|
|
"langkit",
|
2023-05-22 14:31:48 +00:00
|
|
|
"lark",
|
|
|
|
"pexpect",
|
|
|
|
"pyvespa",
|
|
|
|
"O365",
|
|
|
|
"jq",
|
|
|
|
"docarray",
|
|
|
|
"steamship",
|
|
|
|
"pdfminer-six",
|
|
|
|
"lxml",
|
|
|
|
"requests-toolbelt",
|
|
|
|
"neo4j",
|
2023-05-23 13:45:48 +00:00
|
|
|
"openlm",
|
|
|
|
"azure-ai-formrecognizer",
|
|
|
|
"azure-ai-vision",
|
|
|
|
"azure-cognitiveservices-speech",
|
2023-06-06 03:39:16 +00:00
|
|
|
"momento",
|
2023-06-08 03:45:33 +00:00
|
|
|
"singlestoredb",
|
2023-06-08 05:47:48 +00:00
|
|
|
"tigrisdb",
|
2023-06-08 04:56:43 +00:00
|
|
|
"nebula3-python",
|
Add a new vector store - AwaDB (#5971) (#5992)
Added AwaDB vector store, which is a wrapper over the AwaDB, that can be
used as a vector storage and has an efficient similarity search. Added
integration tests for the vector store
Added jupyter notebook with the example
Delete a unneeded empty file and resolve the
conflict(https://github.com/hwchase17/langchain/pull/5886)
Please check, Thanks!
@dev2049
@hwchase17
---------
<!--
Thank you for contributing to LangChain! Your PR will appear in our
release under the title you set. Please make sure it highlights your
valuable contribution.
Replace this with a description of the change, the issue it fixes (if
applicable), and relevant context. List any dependencies required for
this change.
After you're done, someone will review your PR. They may suggest
improvements. If no one reviews your PR within a few days, feel free to
@-mention the same people again, as notifications can get lost.
Finally, we'd love to show appreciation for your contribution - if you'd
like us to shout you out on Twitter, please also include your handle!
-->
<!-- Remove if not applicable -->
Fixes # (issue)
#### Before submitting
<!-- If you're adding a new integration, please include:
1. a test for the integration - favor unit tests that does not rely on
network access.
2. an example notebook showing its use
See contribution guidelines for more information on how to write tests,
lint
etc:
https://github.com/hwchase17/langchain/blob/master/.github/CONTRIBUTING.md
-->
#### Who can review?
Tag maintainers/contributors who might be interested:
<!-- For a quicker response, figure out the right person to tag with @
@hwchase17 - project lead
Tracing / Callbacks
- @agola11
Async
- @agola11
DataLoaders
- @eyurtsev
Models
- @hwchase17
- @agola11
Agents / Tools / Toolkits
- @vowelparrot
VectorStores / Retrievers / Memory
- @dev2049
-->
---------
Co-authored-by: ljeagle <vincent_jieli@yeah.net>
Co-authored-by: vincent <awadb.vincent@gmail.com>
2023-06-10 22:42:32 +00:00
|
|
|
"awadb",
|
2023-06-27 22:58:47 +00:00
|
|
|
"esprima",
|
2023-06-29 06:04:11 +00:00
|
|
|
"octoai-sdk",
|
2023-05-22 14:31:48 +00:00
|
|
|
]
|
2023-05-19 21:28:17 +00:00
|
|
|
|
2023-05-10 13:35:07 +00:00
|
|
|
# An extra used to be able to add extended testing.
|
2023-05-15 14:53:00 +00:00
|
|
|
# Please use new-line on formatting to make it easier to add new packages without
|
|
|
|
# merge-conflicts
|
2023-05-12 18:50:08 +00:00
|
|
|
extended_testing = [
|
2023-05-17 18:11:26 +00:00
|
|
|
"beautifulsoup4",
|
2023-05-25 07:21:31 +00:00
|
|
|
"bibtexparser",
|
Cassandra support for chat history using CassIO library (#6771)
### Overview
This PR aims at building on #4378, expanding the capabilities and
building on top of the `cassIO` library to interface with the database
(as opposed to using the core drivers directly).
Usage of `cassIO` (a library abstracting Cassandra access for
ML/GenAI-specific purposes) is already established since #6426 was
merged, so no new dependencies are introduced.
In the same spirit, we try to uniform the interface for using Cassandra
instances throughout LangChain: all our appreciation of the work by
@jj701 notwithstanding, who paved the way for this incremental work
(thank you!), we identified a few reasons for changing the way a
`CassandraChatMessageHistory` is instantiated. Advocating a syntax
change is something we don't take lighthearted way, so we add some
explanations about this below.
Additionally, this PR expands on integration testing, enables use of
Cassandra's native Time-to-Live (TTL) features and improves the phrasing
around the notebook example and the short "integrations" documentation
paragraph.
We would kindly request @hwchase to review (since this is an elaboration
and proposed improvement of #4378 who had the same reviewer).
### About the __init__ breaking changes
There are
[many](https://docs.datastax.com/en/developer/python-driver/3.28/api/cassandra/cluster/)
options when creating the `Cluster` object, and new ones might be added
at any time. Choosing some of them and exposing them as `__init__`
parameters `CassandraChatMessageHistory` will prove to be insufficient
for at least some users.
On the other hand, working through `kwargs` or adding a long, long list
of arguments to `__init__` is not a desirable option either. For this
reason, (as done in #6426), we propose that whoever instantiates the
Chat Message History class provide a Cassandra `Session` object, ready
to use. This also enables easier injection of mocks and usage of
Cassandra-compatible connections (such as those to the cloud database
DataStax Astra DB, obtained with a different set of init parameters than
`contact_points` and `port`).
We feel that a breaking change might still be acceptable since LangChain
is at `0.*`. However, while maintaining that the approach we propose
will be more flexible in the future, room could be made for a
"compatibility layer" that respects the current init method. Honestly,
we would to that only if there are strong reasons for it, as that would
entail an additional maintenance burden.
### Other changes
We propose to remove the keyspace creation from the class code for two
reasons: first, production Cassandra instances often employ RBAC so that
the database user reading/writing from tables does not necessarily (and
generally shouldn't) have permission to create keyspaces, and second
that programmatic keyspace creation is not a best practice (it should be
done more or less manually, with extra care about schema mismatched
among nodes, etc). Removing this (usually unnecessary) operation from
the `__init__` path would also improve initialization performance
(shorter time).
We suggest, likewise, to remove the `__del__` method (which would close
the database connection), for the following reason: it is the
recommended best practice to create a single Cassandra `Session` object
throughout an application (it is a resource-heavy object capable to
handle concurrency internally), so in case Cassandra is used in other
ways by the app there is the risk of truncating the connection for all
usages when the history instance is destroyed. Moreover, the `Session`
object, in typical applications, is best left to garbage-collect itself
automatically.
As mentioned above, we defer the actual database I/O to the `cassIO`
library, which is designed to encode practices optimized for LLM
applications (among other) without the need to expose LangChain
developers to the internals of CQL (Cassandra Query Language). CassIO is
already employed by the LangChain's Vector Store support for Cassandra.
We added a few more connection options in the companion notebook example
(most notably, Astra DB) to encourage usage by anyone who cannot run
their own Cassandra cluster.
We surface the `ttl_seconds` option for automatic handling of an
expiration time to chat history messages, a likely useful feature given
that very old messages generally may lose their importance.
We elaborated a bit more on the integration testing (Time-to-live,
separation of "session ids", ...).
### Remarks from linter & co.
We reinstated `cassio` as a dependency both in the "optional" group and
in the "integration testing" group of `pyproject.toml`. This might not
be the right thing do to, in which case the author of this PR offer his
apologies (lack of confidence with Poetry - happy to be pointed in the
right direction, though!).
During linter tests, we were hit by some errors which appear unrelated
to the code in the PR. We left them here and report on them here for
awareness:
```
langchain/vectorstores/mongodb_atlas.py:137: error: Argument 1 to "insert_many" of "Collection" has incompatible type "List[Dict[str, Sequence[object]]]"; expected "Iterable[Union[MongoDBDocumentType, RawBSONDocument]]" [arg-type]
langchain/vectorstores/mongodb_atlas.py:186: error: Argument 1 to "aggregate" of "Collection" has incompatible type "List[object]"; expected "Sequence[Mapping[str, Any]]" [arg-type]
langchain/vectorstores/qdrant.py:16: error: Name "grpc" is not defined [name-defined]
langchain/vectorstores/qdrant.py:19: error: Name "grpc" is not defined [name-defined]
langchain/vectorstores/qdrant.py:20: error: Name "grpc" is not defined [name-defined]
langchain/vectorstores/qdrant.py:22: error: Name "grpc" is not defined [name-defined]
langchain/vectorstores/qdrant.py:23: error: Name "grpc" is not defined [name-defined]
```
In the same spirit, we observe that to even get `import langchain` run,
it seems that a `pip install bs4` is missing from the minimal package
installation path.
Thank you!
2023-06-29 17:50:34 +00:00
|
|
|
"cassio",
|
2023-05-18 13:55:14 +00:00
|
|
|
"chardet",
|
2023-06-27 22:58:47 +00:00
|
|
|
"esprima",
|
2023-05-15 14:53:00 +00:00
|
|
|
"jq",
|
|
|
|
"pdfminer.six",
|
2023-06-18 23:55:18 +00:00
|
|
|
"pgvector",
|
2023-05-15 14:53:00 +00:00
|
|
|
"pypdf",
|
2023-05-15 18:21:05 +00:00
|
|
|
"pymupdf",
|
|
|
|
"pypdfium2",
|
2023-05-15 14:53:00 +00:00
|
|
|
"tqdm",
|
|
|
|
"lxml",
|
2023-05-16 22:17:07 +00:00
|
|
|
"atlassian-python-api",
|
|
|
|
"beautifulsoup4",
|
2023-05-16 21:35:25 +00:00
|
|
|
"pandas",
|
|
|
|
"telethon",
|
2023-05-21 16:13:20 +00:00
|
|
|
"psychicapi",
|
2023-05-19 21:28:17 +00:00
|
|
|
"zep-python",
|
2023-05-19 22:27:50 +00:00
|
|
|
"gql",
|
|
|
|
"requests_toolbelt",
|
2023-05-24 17:02:09 +00:00
|
|
|
"html2text",
|
2023-05-30 02:47:56 +00:00
|
|
|
"py-trello",
|
2023-05-24 17:02:09 +00:00
|
|
|
"scikit-learn",
|
2023-06-22 20:14:28 +00:00
|
|
|
"streamlit",
|
2023-06-11 22:51:28 +00:00
|
|
|
"pyspark",
|
|
|
|
"openai"
|
2023-05-12 18:50:08 +00:00
|
|
|
]
|
2022-12-04 00:42:59 +00:00
|
|
|
|
2023-06-12 04:15:42 +00:00
|
|
|
[[tool.poetry.source]]
|
|
|
|
name = "azure-sdk-dev"
|
|
|
|
url = "https://pkgs.dev.azure.com/azure-sdk/public/_packaging/azure-sdk-for-python/pypi/simple/"
|
|
|
|
secondary = true
|
|
|
|
|
2023-02-25 16:59:52 +00:00
|
|
|
[tool.ruff]
|
|
|
|
select = [
|
|
|
|
"E", # pycodestyle
|
|
|
|
"F", # pyflakes
|
|
|
|
"I", # isort
|
|
|
|
]
|
2023-04-21 17:47:57 +00:00
|
|
|
exclude = [
|
|
|
|
"tests/integration_tests/examples/non-utf8-encoding.py",
|
|
|
|
]
|
2022-10-24 21:51:15 +00:00
|
|
|
|
|
|
|
[tool.mypy]
|
|
|
|
ignore_missing_imports = "True"
|
|
|
|
disallow_untyped_defs = "True"
|
2023-06-27 22:58:47 +00:00
|
|
|
exclude = ["notebooks", "examples", "example_data"]
|
2022-12-04 00:42:59 +00:00
|
|
|
|
2023-01-22 22:48:20 +00:00
|
|
|
[tool.coverage.run]
|
|
|
|
omit = [
|
|
|
|
"tests/*",
|
|
|
|
]
|
|
|
|
|
2022-12-04 00:42:59 +00:00
|
|
|
[build-system]
|
2023-04-25 01:19:51 +00:00
|
|
|
requires = ["poetry-core>=1.0.0"]
|
2022-12-04 00:42:59 +00:00
|
|
|
build-backend = "poetry.core.masonry.api"
|
2023-05-08 20:15:09 +00:00
|
|
|
|
|
|
|
[tool.pytest.ini_options]
|
|
|
|
# --strict-markers will raise errors on unknown marks.
|
|
|
|
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
|
|
|
#
|
|
|
|
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
|
|
|
# --strict-config any warnings encountered while parsing the `pytest`
|
|
|
|
# section of the configuration file raise errors.
|
2023-06-11 22:51:28 +00:00
|
|
|
#
|
|
|
|
# https://github.com/tophat/syrupy
|
|
|
|
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
|
|
|
addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused"
|
2023-05-08 20:15:09 +00:00
|
|
|
# Registering custom markers.
|
|
|
|
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
|
|
|
markers = [
|
|
|
|
"requires: mark tests as requiring a specific library"
|
|
|
|
]
|