langchain/libs/experimental/pyproject.toml

[tool.poetry]
name = "langchain-experimental"
version = "0.0.29"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
readme = "README.md"
repository = "https://github.com/langchain-ai/langchain"


[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain = ">=0.0.308"
presidio-anonymizer = {version = "^2.2.33", optional = true}
presidio-analyzer = {version = "^2.2.33", optional = true}
faker = {version = "^19.3.1", optional = true}
vowpal-wabbit-next = {version = "0.6.0", optional = true}
sentence-transformers = {version = "^2", optional = true}


[tool.poetry.group.lint.dependencies]
ruff = "^0.0.249"
black = "^23.1.0"

[tool.poetry.group.typing.dependencies]
mypy = "^0.991"
types-pyyaml = "^6.0.12.2"
types-requests = "^2.28.11.5"

[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
setuptools = "^67.6.1"

[tool.poetry.group.test.dependencies]
# The only dependencies that should be added are
# dependencies used for running tests (e.g., pytest, freezegun, response).
# Any dependencies that do not meet that criteria will be removed.
pytest = "^7.3.0"

# An extra used to be able to add extended testing.
# Please use new-line on formatting to make it easier to add new packages without
# merge-conflicts
[tool.poetry.extras]
extended_testing = [
 "presidio-anonymizer",
 "presidio-analyzer",
 "faker",
 "vowpal-wabbit-next",
 "sentence-transformers",
]

[tool.ruff]
select = [
  "E",  # pycodestyle
  "F",  # pyflakes
  "I",  # isort
]

[tool.mypy]
ignore_missing_imports = "True"
disallow_untyped_defs = "True"
exclude = ["notebooks", "examples", "example_data"]

[tool.coverage.run]
omit = [
    "tests/*",
]

[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

[tool.pytest.ini_options]
# --strict-markers will raise errors on unknown marks.
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
#
# https://docs.pytest.org/en/7.1.x/reference/reference.html
# --strict-config       any warnings encountered while parsing the `pytest`
#                       section of the configuration file raise errors.
#
# https://github.com/tophat/syrupy
# --snapshot-warn-unused    Prints a warning on unused snapshots rather than fail the test suite.
addopts = "--strict-markers --strict-config --durations=5"
# Registering custom markers.
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
markers = [
  "requires: mark tests as requiring a specific library",
  "asyncio: mark tests as requiring asyncio"
]
Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00			`[tool.poetry]`
remove CVEs (#8092) This PR aims to move all code with CVEs into `langchain.experimental`. Note that we are NOT yet removing from the core `langchain` package - we will give people a week to migrate here. See MIGRATE.md for how to migrate Zero changes to functionality Vulnerabilities this addresses: PALChain: - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5752409 - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5759265 SQLDatabaseChain - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5759268 `load_prompt` (Python files only) - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5725807 2023-07-21 20:32:39 +00:00			`name = "langchain-experimental"`
bump 313 (#11718) 2023-10-12 16:48:54 +00:00			`version = "0.0.29"`
Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00			`description = "Building applications with LLMs through composability"`
			`authors = []`
			`license = "MIT"`
			`readme = "README.md"`
Update repository links in the package metadata. (#9454) 2023-08-18 16:55:43 +00:00			`repository = "https://github.com/langchain-ai/langchain"`
Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00

			`[tool.poetry.dependencies]`
			`python = ">=3.8.1,<4.0"`
Tweak type hints to match dependency's behavior. (#11355) Needs #11353 to merge first, and a new `langchain` to be published with those changes. 2023-10-05 02:36:58 +00:00			`langchain = ">=0.0.308"`
Add data anonymizer (#9863) ### Description The feature for anonymizing data has been implemented. In order to protect private data, such as when querying external APIs (OpenAI), it is worth pseudonymizing sensitive data to maintain full privacy. Anonynization consists of two steps: 1. Identification: Identify all data fields that contain personally identifiable information (PII). 2. Replacement: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data. We use Microsoft Presidio together with Faker framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`. ### Future works - deanonymization - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data. - instance anonymization - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object. ### Twitter handle @deepsense_ai / @MaksOpp --------- Co-authored-by: MaksOpp <maks.operlejn@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-08-30 17:39:44 +00:00			`presidio-anonymizer = {version = "^2.2.33", optional = true}`
			`presidio-analyzer = {version = "^2.2.33", optional = true}`
			`faker = {version = "^19.3.1", optional = true}`
move everything into experimental 2023-09-11 16:16:08 +00:00			`vowpal-wabbit-next = {version = "0.6.0", optional = true}`
			`sentence-transformers = {version = "^2", optional = true}`
Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00

			`[tool.poetry.group.lint.dependencies]`
			`ruff = "^0.0.249"`
			`black = "^23.1.0"`

			`[tool.poetry.group.typing.dependencies]`
			`mypy = "^0.991"`
remove CVEs (#8092) This PR aims to move all code with CVEs into `langchain.experimental`. Note that we are NOT yet removing from the core `langchain` package - we will give people a week to migrate here. See MIGRATE.md for how to migrate Zero changes to functionality Vulnerabilities this addresses: PALChain: - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5752409 - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5759265 SQLDatabaseChain - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5759268 `load_prompt` (Python files only) - https://security.snyk.io/vuln/SNYK-PYTHON-LANGCHAIN-5725807 2023-07-21 20:32:39 +00:00			`types-pyyaml = "^6.0.12.2"`
Diffbot Graph Transformer / Neo4j Graph document ingestion (#9979) Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-09-06 20:32:59 +00:00			`types-requests = "^2.28.11.5"`
Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00
			`[tool.poetry.group.dev.dependencies]`
			`jupyter = "^1.0.0"`
			`setuptools = "^67.6.1"`

			`[tool.poetry.group.test.dependencies]`
			`# The only dependencies that should be added are`
			`# dependencies used for running tests (e.g., pytest, freezegun, response).`
			`# Any dependencies that do not meet that criteria will be removed.`
			`pytest = "^7.3.0"`

Add data anonymizer (#9863) ### Description The feature for anonymizing data has been implemented. In order to protect private data, such as when querying external APIs (OpenAI), it is worth pseudonymizing sensitive data to maintain full privacy. Anonynization consists of two steps: 1. Identification: Identify all data fields that contain personally identifiable information (PII). 2. Replacement: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data. We use Microsoft Presidio together with Faker framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`. ### Future works - deanonymization - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data. - instance anonymization - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object. ### Twitter handle @deepsense_ai / @MaksOpp --------- Co-authored-by: MaksOpp <maks.operlejn@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-08-30 17:39:44 +00:00			`# An extra used to be able to add extended testing.`
			`# Please use new-line on formatting to make it easier to add new packages without`
			`# merge-conflicts`
			`[tool.poetry.extras]`
			`extended_testing = [`
			`"presidio-anonymizer",`
			`"presidio-analyzer",`
			`"faker",`
move everything into experimental 2023-09-11 16:16:08 +00:00			`"vowpal-wabbit-next",`
			`"sentence-transformers",`
Add data anonymizer (#9863) ### Description The feature for anonymizing data has been implemented. In order to protect private data, such as when querying external APIs (OpenAI), it is worth pseudonymizing sensitive data to maintain full privacy. Anonynization consists of two steps: 1. Identification: Identify all data fields that contain personally identifiable information (PII). 2. Replacement: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data. We use Microsoft Presidio together with Faker framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`. ### Future works - deanonymization - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data. - instance anonymization - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object. ### Twitter handle @deepsense_ai / @MaksOpp --------- Co-authored-by: MaksOpp <maks.operlejn@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-08-30 17:39:44 +00:00			`]`

Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00			`[tool.ruff]`
			`select = [`
			`"E", # pycodestyle`
			`"F", # pyflakes`
			`"I", # isort`
			`]`

			`[tool.mypy]`
			`ignore_missing_imports = "True"`
			`disallow_untyped_defs = "True"`
			`exclude = ["notebooks", "examples", "example_data"]`

			`[tool.coverage.run]`
			`omit = [`
			`"tests/*",`
			`]`

			`[build-system]`
			`requires = ["poetry-core>=1.0.0"]`
			`build-backend = "poetry.core.masonry.api"`

			`[tool.pytest.ini_options]`
			`# --strict-markers will raise errors on unknown marks.`
			`# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks`
			`#`
			`# https://docs.pytest.org/en/7.1.x/reference/reference.html`
			# --strict-config any warnings encountered while parsing the `pytest`
			`# section of the configuration file raise errors.`
			`#`
			`# https://github.com/tophat/syrupy`
			`# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.`
			`addopts = "--strict-markers --strict-config --durations=5"`
			`# Registering custom markers.`
			`# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers`
			`markers = [`
General anthropic functions, steps towards experimental integration tests (#11727) To match change in js here https://github.com/langchain-ai/langchainjs/pull/2892 Some integration tests need a bit more work in experimental: ![Screenshot 2023-10-12 at 12 02 49 PM](https://github.com/langchain-ai/langchain/assets/9557659/262d7d22-c405-40e9-afef-669e8d585307) Pretty sure the sqldatabase ones are an actual regression or change in interface because it's returning a placeholder. --------- Co-authored-by: Bagatur <baskaryan@gmail.com> 2023-10-13 16:48:24 +00:00			`"requires: mark tests as requiring a specific library",`
			`"asyncio: mark tests as requiring asyncio"`
Harrison/move experimental (#8084) 2023-07-21 17:36:28 +00:00			`]`