mirror of
https://github.com/hwchase17/langchain
synced 2024-11-18 09:25:54 +00:00
c28efb878c
- **Description:** the layout of html pages can be variant based on the bootstrap framework or the styles of the pages. So we need to have a splitter to transform the html tags to a proper layout and then split the html content based on the provided list of tags to determine its html sections. We are using BS4 library along with xslt structure to split the html content using an section aware approach. - **Dependencies:** No new dependencies - **Twitter handle:** @m_setayesh Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> --------- Co-authored-by: Bagatur <baskaryan@gmail.com>
106 lines
2.7 KiB
TOML
106 lines
2.7 KiB
TOML
[tool.poetry]
|
|
name = "langchain-text-splitters"
|
|
version = "0.0.1"
|
|
description = "LangChain text splitting utilities"
|
|
authors = []
|
|
license = "MIT"
|
|
readme = "README.md"
|
|
repository = "https://github.com/langchain-ai/langchain"
|
|
|
|
|
|
[tool.poetry.dependencies]
|
|
python = ">=3.8.1,<4.0"
|
|
langchain-core = "^0.1.28"
|
|
lxml = {version = ">=4.9.3,<6.0", optional = true}
|
|
beautifulsoup4 = {version = "^4.12.3", optional = true}
|
|
|
|
[tool.poetry.group.lint]
|
|
optional = true
|
|
|
|
[tool.poetry.group.lint.dependencies]
|
|
ruff = "^0.1.5"
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
[tool.poetry.group.typing]
|
|
optional = true
|
|
|
|
[tool.poetry.group.typing.dependencies]
|
|
mypy = "^1"
|
|
lxml-stubs = "^0.5.1"
|
|
types-requests = "^2.31.0.20240218"
|
|
tiktoken = "^0.6.0"
|
|
spacy = "^3.7.4"
|
|
|
|
[tool.poetry.group.dev]
|
|
optional = true
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
jupyter = "^1.0.0"
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
[tool.poetry.group.test]
|
|
optional = true
|
|
|
|
[tool.poetry.group.test.dependencies]
|
|
# The only dependencies that should be added are
|
|
# dependencies used for running tests (e.g., pytest, freezegun, response).
|
|
# Any dependencies that do not meet that criteria will be removed.
|
|
pytest = "^7.3.0"
|
|
freezegun = "^1.2.2"
|
|
pytest-mock = "^3.10.0"
|
|
pytest-watcher = "^0.3.4"
|
|
pytest-asyncio = "^0.21.1"
|
|
pytest-profiling = "^1.7.0"
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
|
|
[tool.poetry.group.test_integration]
|
|
optional = true
|
|
dependencies = {}
|
|
|
|
[tool.poetry.extras]
|
|
extended_testing = [
|
|
"lxml", "beautifulsoup4"
|
|
]
|
|
|
|
[tool.ruff.lint]
|
|
select = [
|
|
"E", # pycodestyle
|
|
"F", # pyflakes
|
|
"I", # isort
|
|
"T201", # print
|
|
]
|
|
|
|
[tool.mypy]
|
|
disallow_untyped_defs = "True"
|
|
|
|
[[tool.mypy.overrides]]
|
|
module = ["transformers", "sentence_transformers", "nltk.tokenize", "konlpy.tag", "bs4"]
|
|
ignore_missing_imports = "True"
|
|
|
|
[tool.coverage.run]
|
|
omit = ["tests/*", ]
|
|
|
|
[build-system]
|
|
requires = ["poetry-core>=1.0.0"]
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
[tool.pytest.ini_options]
|
|
# --strict-markers will raise errors on unknown marks.
|
|
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
|
#
|
|
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
|
# --strict-config any warnings encountered while parsing the `pytest`
|
|
# section of the configuration file raise errors.
|
|
#
|
|
addopts = "--strict-markers --strict-config --durations=5"
|
|
# Registering custom markers.
|
|
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
|
markers = [
|
|
"requires: mark tests as requiring a specific library",
|
|
"asyncio: mark tests as requiring asyncio",
|
|
"compile: mark placeholder test used to compile integration tests without running them",
|
|
]
|
|
asyncio_mode = "auto"
|
|
|