mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
8021d2a2ab
Thank you for contributing to LangChain! - Oracle AI Vector Search Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems. - Oracle AI Vector Search is designed for Artificial Intelligence (AI) workloads that allows you to query data based on semantics, rather than keywords. One of the biggest benefit of Oracle AI Vector Search is that semantic search on unstructured data can be combined with relational search on business data in one single system. This is not only powerful but also significantly more effective because you don't need to add a specialized vector database, eliminating the pain of data fragmentation between multiple systems. This Pull Requests Adds the following functionalities Oracle AI Vector Search : Vector Store Oracle AI Vector Search : Document Loader Oracle AI Vector Search : Document Splitter Oracle AI Vector Search : Summary Oracle AI Vector Search : Oracle Embeddings - We have added unit tests and have our own local unit test suite which verifies all the code is correct. We have made sure to add guides for each of the components and one end to end guide that shows how the entire thing runs. - We have made sure that make format and make lint run clean. Additional guidelines: - Make sure optional dependencies are imported within a function. - Please do not add dependencies to pyproject.toml files (even optional ones) unless they are required for unit tests. - Most PRs should not touch more than one package. - Changes should be backwards compatible. - If you are adding something to community, do not re-import it in langchain. If no one reviews your PR within a few days, please @-mention one of baskaryan, efriis, eyurtsev, hwchase17. --------- Co-authored-by: skmishraoracle <shailendra.mishra@oracle.com> Co-authored-by: hroyofc <harichandan.roy@oracle.com> Co-authored-by: Bagatur <22008038+baskaryan@users.noreply.github.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
343 lines
11 KiB
TOML
343 lines
11 KiB
TOML
[tool.poetry]
|
|
name = "langchain-community"
|
|
version = "0.0.36"
|
|
description = "Community contributed LangChain integrations."
|
|
authors = []
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
repository = "https://github.com/langchain-ai/langchain"
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.8.1,<4.0"
|
|
langchain-core = "^0.1.48"
|
|
SQLAlchemy = ">=1.4,<3"
|
|
requests = "^2"
|
|
PyYAML = ">=5.3"
|
|
numpy = "^1"
|
|
aiohttp = "^3.8.3"
|
|
tenacity = "^8.1.0"
|
|
dataclasses-json = ">= 0.5.7, < 0.7"
|
|
langsmith = "^0.1.0"
|
|
tqdm = {version = ">=4.48.0", optional = true}
|
|
openapi-pydantic = {version = "^0.3.2", optional = true}
|
|
faiss-cpu = {version = "^1", optional = true}
|
|
beautifulsoup4 = {version = "^4", optional = true}
|
|
jinja2 = {version = "^3", optional = true}
|
|
cohere = {version = "^4", optional = true}
|
|
openai = {version = "<2", optional = true}
|
|
arxiv = {version = "^1.4", optional = true}
|
|
pypdf = {version = "^3.4.0", optional = true}
|
|
aleph-alpha-client = {version="^2.15.0", optional = true}
|
|
gradientai = {version="^1.4.0", optional = true}
|
|
pgvector = {version = "^0.1.6", optional = true}
|
|
atlassian-python-api = {version = "^3.36.0", optional=true}
|
|
html2text = {version="^2020.1.16", optional=true}
|
|
numexpr = {version="^2.8.6", optional=true}
|
|
jq = {version = "^1.4.1", optional = true}
|
|
pdfminer-six = {version = "^20221105", optional = true}
|
|
lxml = {version = ">=4.9.3,<6.0", optional = true}
|
|
pymupdf = {version = "^1.22.3", optional = true}
|
|
rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"}
|
|
pypdfium2 = {version = "^4.10.0", optional = true}
|
|
gql = {version = "^3.4.1", optional = true}
|
|
pandas = {version = "^2.0.1", optional = true}
|
|
telethon = {version = "^1.28.5", optional = true}
|
|
chardet = {version="^5.1.0", optional=true}
|
|
requests-toolbelt = {version = "^1.0.0", optional = true}
|
|
scikit-learn = {version = "^1.2.2", optional = true}
|
|
py-trello = {version = "^0.19.0", optional = true}
|
|
bibtexparser = {version = "^1.4.0", optional = true}
|
|
pyspark = {version = "^3.4.0", optional = true}
|
|
mwparserfromhell = {version = "^0.6.4", optional = true}
|
|
mwxml = {version = "^0.3.3", optional = true}
|
|
esprima = {version = "^4.0.1", optional = true}
|
|
streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
|
|
psychicapi = {version = "^0.8.0", optional = true}
|
|
cassio = {version = "^0.1.6", optional = true}
|
|
sympy = {version = "^1.12", optional = true}
|
|
rapidfuzz = {version = "^3.1.1", optional = true}
|
|
jsonschema = {version = ">1", optional = true}
|
|
rank-bm25 = {version = "^0.2.2", optional = true}
|
|
geopandas = {version = "^0.13.1", optional = true}
|
|
gitpython = {version = "^3.1.32", optional = true}
|
|
feedparser = {version = "^6.0.10", optional = true}
|
|
newspaper3k = {version = "^0.2.8", optional = true}
|
|
xata = {version = "^1.0.0a7", optional = true}
|
|
xmltodict = {version = "^0.13.0", optional = true}
|
|
markdownify = {version = "^0.11.6", optional = true}
|
|
assemblyai = {version = "^0.17.0", optional = true}
|
|
sqlite-vss = {version = "^0.1.2", optional = true}
|
|
motor = {version = "^3.3.1", optional = true}
|
|
timescale-vector = {version = "^0.0.1", optional = true}
|
|
typer = {version= "^0.9.0", optional = true}
|
|
anthropic = {version = "^0.3.11", optional = true}
|
|
aiosqlite = {version = "^0.19.0", optional = true}
|
|
rspace_client = {version = "^2.5.0", optional = true}
|
|
upstash-redis = {version = "^0.15.0", optional = true}
|
|
google-cloud-documentai = {version = "^2.20.1", optional = true}
|
|
fireworks-ai = {version = "^0.9.0", optional = true}
|
|
javelin-sdk = {version = "^0.1.8", optional = true}
|
|
hologres-vector = {version = "^0.0.6", optional = true}
|
|
praw = {version = "^7.7.1", optional = true}
|
|
msal = {version = "^1.25.0", optional = true}
|
|
databricks-vectorsearch = {version = "^0.21", optional = true}
|
|
cloudpickle = {version = ">=2.0.0", optional = true}
|
|
dgml-utils = {version = "^0.3.0", optional = true}
|
|
datasets = {version = "^2.15.0", optional = true}
|
|
tree-sitter = {version = "^0.20.2", optional = true}
|
|
tree-sitter-languages = {version = "^1.8.0", optional = true}
|
|
azure-ai-documentintelligence = {version = "^1.0.0b1", optional = true}
|
|
oracle-ads = {version = "^2.9.1", optional = true}
|
|
httpx = {version = "^0.24.1", optional = true}
|
|
elasticsearch = {version = "^8.12.0", optional = true}
|
|
hdbcli = {version = "^2.19.21", optional = true}
|
|
oci = {version = "^2.119.1", optional = true}
|
|
rdflib = {version = "7.0.0", optional = true}
|
|
nvidia-riva-client = {version = "^2.14.0", optional = true}
|
|
azure-search-documents = {version = "11.4.0", optional = true}
|
|
azure-identity = {version = "^1.15.0", optional = true}
|
|
tidb-vector = {version = ">=0.0.3,<1.0.0", optional = true}
|
|
friendli-client = {version = "^1.2.4", optional = true}
|
|
premai = {version = "^0.3.25", optional = true}
|
|
vdms = {version = "^0.0.20", optional = true}
|
|
httpx-sse = {version = "^0.4.0", optional = true}
|
|
pyjwt = {version = "^2.8.0", optional = true}
|
|
oracledb = {version = "^2.2.0", optional = true}
|
|
|
|
[tool.poetry.group.test]
|
|
optional = true
|
|
|
|
[tool.poetry.group.test.dependencies]
|
|
# The only dependencies that should be added are
|
|
# dependencies used for running tests (e.g., pytest, freezegun, response).
|
|
# Any dependencies that do not meet that criteria will be removed.
|
|
pytest = "^7.3.0"
|
|
pytest-cov = "^4.1.0"
|
|
pytest-dotenv = "^0.5.2"
|
|
duckdb-engine = "^0.9.2"
|
|
pytest-watcher = "^0.2.6"
|
|
freezegun = "^1.2.2"
|
|
responses = "^0.22.0"
|
|
pytest-asyncio = "^0.20.3"
|
|
lark = "^1.1.5"
|
|
pandas = "^2.0.0"
|
|
pytest-mock = "^3.10.0"
|
|
pytest-socket = "^0.6.0"
|
|
syrupy = "^4.0.2"
|
|
requests-mock = "^1.11.0"
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
[tool.poetry.group.codespell]
|
|
optional = true
|
|
|
|
[tool.poetry.group.codespell.dependencies]
|
|
codespell = "^2.2.0"
|
|
|
|
[tool.poetry.group.test_integration]
|
|
optional = true
|
|
|
|
[tool.poetry.group.test_integration.dependencies]
|
|
# Do not add dependencies in the test_integration group
|
|
# Instead:
|
|
# 1. Add an optional dependency to the main group
|
|
# poetry add --optional [package name]
|
|
# 2. Add the package name to the extended_testing extra (find it below)
|
|
# 3. Relock the poetry file
|
|
# poetry lock --no-update
|
|
# 4. Favor unit tests not integration tests.
|
|
# Use the @pytest.mark.requires(pkg_name) decorator in unit_tests.
|
|
# Your tests should not rely on network access, as it prevents other
|
|
# developers from being able to easily run them.
|
|
# Instead write unit tests that use the `responses` library or mock.patch with
|
|
# fixtures. Keep the fixtures minimal.
|
|
# See Contributing Guide for more instructions on working with optional dependencies.
|
|
# https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies
|
|
pytest-vcr = "^1.0.2"
|
|
wrapt = "^1.15.0"
|
|
openai = "^1"
|
|
python-dotenv = "^1.0.0"
|
|
cassio = "^0.1.6"
|
|
tiktoken = ">=0.3.2,<0.6.0"
|
|
anthropic = "^0.3.11"
|
|
langchain-core = { path = "../core", develop = true }
|
|
fireworks-ai = "^0.9.0"
|
|
vdms = "^0.0.20"
|
|
exllamav2 = "^0.0.18"
|
|
|
|
[tool.poetry.group.lint]
|
|
optional = true
|
|
|
|
[tool.poetry.group.lint.dependencies]
|
|
ruff = "^0.1.5"
|
|
|
|
[tool.poetry.group.typing.dependencies]
|
|
mypy = "^0.991"
|
|
types-pyyaml = "^6.0.12.2"
|
|
types-requests = "^2.28.11.5"
|
|
types-toml = "^0.10.8.1"
|
|
types-pytz = "^2023.3.0.0"
|
|
types-chardet = "^5.0.4.6"
|
|
types-redis = "^4.3.21.6"
|
|
mypy-protobuf = "^3.0.0"
|
|
langchain-core = {path = "../core", develop = true}
|
|
langchain-text-splitters = {path = "../text-splitters", develop = true}
|
|
|
|
[tool.poetry.group.dev]
|
|
optional = true
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
jupyter = "^1.0.0"
|
|
setuptools = "^67.6.1"
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
[tool.poetry.extras]
|
|
cli = ["typer"]
|
|
|
|
# An extra used to be able to add extended testing.
|
|
# Please use new-line on formatting to make it easier to add new packages without
|
|
# merge-conflicts
|
|
extended_testing = [
|
|
"aleph-alpha-client",
|
|
"aiosqlite",
|
|
"assemblyai",
|
|
"beautifulsoup4",
|
|
"bibtexparser",
|
|
"cassio",
|
|
"chardet",
|
|
"datasets",
|
|
"google-cloud-documentai",
|
|
"esprima",
|
|
"jq",
|
|
"pdfminer-six",
|
|
"pgvector",
|
|
"pypdf",
|
|
"pymupdf",
|
|
"pypdfium2",
|
|
"tqdm",
|
|
"lxml",
|
|
"atlassian-python-api",
|
|
"mwparserfromhell",
|
|
"mwxml",
|
|
"msal",
|
|
"pandas",
|
|
"telethon",
|
|
"psychicapi",
|
|
"gql",
|
|
"gradientai",
|
|
"requests-toolbelt",
|
|
"html2text",
|
|
"numexpr",
|
|
"py-trello",
|
|
"scikit-learn",
|
|
"streamlit",
|
|
"pyspark",
|
|
"openai",
|
|
"sympy",
|
|
"rapidfuzz",
|
|
"jsonschema",
|
|
"rank-bm25",
|
|
"geopandas",
|
|
"jinja2",
|
|
"gitpython",
|
|
"newspaper3k",
|
|
"nvidia-riva-client",
|
|
"feedparser",
|
|
"xata",
|
|
"xmltodict",
|
|
"faiss-cpu",
|
|
"openapi-pydantic",
|
|
"markdownify",
|
|
"arxiv",
|
|
"sqlite-vss",
|
|
"rapidocr-onnxruntime",
|
|
"motor",
|
|
"timescale-vector",
|
|
"anthropic",
|
|
"upstash-redis",
|
|
"rspace_client",
|
|
"fireworks-ai",
|
|
"javelin-sdk",
|
|
"hologres-vector",
|
|
"praw",
|
|
"databricks-vectorsearch",
|
|
"cloudpickle",
|
|
"dgml-utils",
|
|
"cohere",
|
|
"tree-sitter",
|
|
"tree-sitter-languages",
|
|
"azure-ai-documentintelligence",
|
|
"oracle-ads",
|
|
"httpx",
|
|
"elasticsearch",
|
|
"hdbcli",
|
|
"oci",
|
|
"rdflib",
|
|
"azure-search-documents",
|
|
"azure-identity",
|
|
"tidb-vector",
|
|
"cloudpickle",
|
|
"friendli-client",
|
|
"premai",
|
|
"vdms",
|
|
"httpx-sse",
|
|
"pyjwt",
|
|
"oracledb"
|
|
]
|
|
|
|
[tool.ruff]
|
|
exclude = [
|
|
"tests/examples/non-utf8-encoding.py",
|
|
"tests/integration_tests/examples/non-utf8-encoding.py",
|
|
]
|
|
|
|
[tool.ruff.lint]
|
|
select = [
|
|
"E", # pycodestyle
|
|
"F", # pyflakes
|
|
"I", # isort
|
|
"T201", # print
|
|
]
|
|
|
|
[tool.mypy]
|
|
ignore_missing_imports = "True"
|
|
disallow_untyped_defs = "True"
|
|
exclude = ["notebooks", "examples", "example_data"]
|
|
|
|
[tool.coverage.run]
|
|
omit = [
|
|
"tests/*",
|
|
]
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=1.0.0"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.pytest.ini_options]
|
|
# --strict-markers will raise errors on unknown marks.
|
|
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
|
#
|
|
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
|
# --strict-config any warnings encountered while parsing the `pytest`
|
|
# section of the configuration file raise errors.
|
|
#
|
|
# https://github.com/tophat/syrupy
|
|
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
|
addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused -vv"
|
|
# Registering custom markers.
|
|
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
|
markers = [
|
|
"requires: mark tests as requiring a specific library",
|
|
"scheduled: mark tests to run in scheduled testing",
|
|
"compile: mark placeholder test used to compile integration tests without running them"
|
|
]
|
|
asyncio_mode = "auto"
|
|
|
|
[tool.codespell]
|
|
skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples,*.trig'
|
|
# Ignore latin etc
|
|
ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
|
|
# whats is a typo but used frequently in queries so kept as is
|
|
# aapply - async apply
|
|
# unsecure - typo but part of API, decided to not bother for now
|
|
ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin'
|