langchain/libs/experimental/pyproject.toml

129 lines
3.8 KiB
TOML
Raw Normal View History

2023-07-21 17:36:28 +00:00
[tool.poetry]
name = "langchain-experimental"
version = "0.0.53"
2023-07-21 17:36:28 +00:00
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
readme = "README.md"
repository = "https://github.com/langchain-ai/langchain"
2023-07-21 17:36:28 +00:00
[tool.poetry.dependencies]
python = ">=3.8.1,<4.0"
langchain-core = "^0.1.27"
langchain = "^0.1.8"
experimental[patch]: missing resolution strategy in anonymization (#16653) - **Description:** Presidio-based anonymizers are not working because `_remove_conflicts_and_get_text_manipulation_data` was being called without a conflict resolution strategy. This PR fixes this issue. In addition, it removes some mutable default arguments (antipattern). To reproduce the issue, just run the very first cell of this [notebook](https://python.langchain.com/docs/guides/privacy/2/) from langchain's documentation. <!-- Thank you for contributing to LangChain! Please title your PR "<package>: <description>", where <package> is whichever of langchain, community, core, experimental, etc. is being modified. Replace this entire comment with: - **Description:** a description of the change, - **Issue:** the issue # it fixes if applicable, - **Dependencies:** any dependencies required for this change, - **Twitter handle:** we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` from the root of the package you've modified to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://python.langchain.com/docs/contributing/ If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/docs/integrations` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. -->
2024-01-29 17:56:16 +00:00
presidio-anonymizer = {version = "^2.2.352", optional = true}
presidio-analyzer = {version = "^2.2.352", optional = true}
Add data anonymizer (#9863) ### Description The feature for anonymizing data has been implemented. In order to protect private data, such as when querying external APIs (OpenAI), it is worth pseudonymizing sensitive data to maintain full privacy. Anonynization consists of two steps: 1. **Identification:** Identify all data fields that contain personally identifiable information (PII). 2. **Replacement**: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data. We use *Microsoft Presidio* together with *Faker* framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`. ### Future works - **deanonymization** - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data. - **instance anonymization** - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object. ### Twitter handle @deepsense_ai / @MaksOpp --------- Co-authored-by: MaksOpp <maks.operlejn@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-08-30 17:39:44 +00:00
faker = {version = "^19.3.1", optional = true}
2023-09-11 16:16:08 +00:00
vowpal-wabbit-next = {version = "0.6.0", optional = true}
sentence-transformers = {version = "^2", optional = true}
jinja2 = {version = "^3", optional = true}
pandas = { version = "^2.0.1", optional = true }
tabulate = {version = "^0.9.0", optional = true}
[tool.poetry.group.lint]
optional = true
2023-07-21 17:36:28 +00:00
[tool.poetry.group.lint.dependencies]
ruff = "^0.1.5"
2023-07-21 17:36:28 +00:00
[tool.poetry.group.typing]
optional = true
2023-07-21 17:36:28 +00:00
[tool.poetry.group.typing.dependencies]
mypy = "^0.991"
types-pyyaml = "^6.0.12.2"
types-requests = "^2.28.11.5"
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 21:53:30 +00:00
langchain = {path = "../langchain", develop = true}
langchain-core = {path = "../core", develop = true}
langchain-community = {path = "../community", develop = true}
2023-07-21 17:36:28 +00:00
[tool.poetry.group.dev]
optional = true
2023-07-21 17:36:28 +00:00
[tool.poetry.group.dev.dependencies]
jupyter = "^1.0.0"
setuptools = "^67.6.1"
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 21:53:30 +00:00
langchain = {path = "../langchain", develop = true}
langchain-core = {path = "../core", develop = true}
langchain-community = {path = "../community", develop = true}
2023-07-21 17:36:28 +00:00
[tool.poetry.group.test]
optional = true
2023-07-21 17:36:28 +00:00
[tool.poetry.group.test.dependencies]
# The only dependencies that should be added are
# dependencies used for running tests (e.g., pytest, freezegun, response).
# Any dependencies that do not meet that criteria will be removed.
pytest = "^7.3.0"
EXPERIMENTAL Generic LLM wrapper to support chat model interface with configurable chat prompt format (#8295) ## Update 2023-09-08 This PR now supports further models in addition to Lllama-2 chat models. See [this comment](#issuecomment-1668988543) for further details. The title of this PR has been updated accordingly. ## Original PR description This PR adds a generic `Llama2Chat` model, a wrapper for LLMs able to serve Llama-2 chat models (like `LlamaCPP`, `HuggingFaceTextGenInference`, ...). It implements `BaseChatModel`, converts a list of chat messages into the [required Llama-2 chat prompt format](https://huggingface.co/blog/llama2#how-to-prompt-llama-2) and forwards the formatted prompt as `str` to the wrapped `LLM`. Usage example: ```python # uses a locally hosted Llama2 chat model llm = HuggingFaceTextGenInference( inference_server_url="http://127.0.0.1:8080/", max_new_tokens=512, top_k=50, temperature=0.1, repetition_penalty=1.03, ) # Wrap llm to support Llama2 chat prompt format. # Resulting model is a chat model model = Llama2Chat(llm=llm) messages = [ SystemMessage(content="You are a helpful assistant."), MessagesPlaceholder(variable_name="chat_history"), HumanMessagePromptTemplate.from_template("{text}"), ] prompt = ChatPromptTemplate.from_messages(messages) memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) chain = LLMChain(llm=model, prompt=prompt, memory=memory) # use chat model in a conversation # ... ``` Also part of this PR are tests and a demo notebook. - Tag maintainer: @hwchase17 - Twitter handle: `@mrt1nz` --------- Co-authored-by: Erick Friis <erick@langchain.dev>
2023-11-18 00:32:13 +00:00
pytest-asyncio = "^0.20.3"
langchain = {path = "../langchain", develop = true}
langchain-core = {path = "../core", develop = true}
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 21:53:30 +00:00
langchain-community = {path = "../community", develop = true}
2023-07-21 17:36:28 +00:00
[tool.poetry.group.test_integration]
optional = true
community[major], core[patch], langchain[patch], experimental[patch]: Create langchain-community (#14463) Moved the following modules to new package langchain-community in a backwards compatible fashion: ``` mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community mv langchain/langchain/adapters community/langchain_community mv langchain/langchain/callbacks community/langchain_community/callbacks mv langchain/langchain/chat_loaders community/langchain_community mv langchain/langchain/chat_models community/langchain_community mv langchain/langchain/document_loaders community/langchain_community mv langchain/langchain/docstore community/langchain_community mv langchain/langchain/document_transformers community/langchain_community mv langchain/langchain/embeddings community/langchain_community mv langchain/langchain/graphs community/langchain_community mv langchain/langchain/llms community/langchain_community mv langchain/langchain/memory/chat_message_histories community/langchain_community mv langchain/langchain/retrievers community/langchain_community mv langchain/langchain/storage community/langchain_community mv langchain/langchain/tools community/langchain_community mv langchain/langchain/utilities community/langchain_community mv langchain/langchain/vectorstores community/langchain_community mv langchain/langchain/agents/agent_toolkits community/langchain_community mv langchain/langchain/cache.py community/langchain_community ``` Moved the following to core ``` mv langchain/langchain/utils/json_schema.py core/langchain_core/utils mv langchain/langchain/utils/html.py core/langchain_core/utils mv langchain/langchain/utils/strings.py core/langchain_core/utils cat langchain/langchain/utils/env.py >> core/langchain_core/utils/env.py rm langchain/langchain/utils/env.py ``` See .scripts/community_split/script_integrations.sh for all changes
2023-12-11 21:53:30 +00:00
[tool.poetry.group.test_integration.dependencies]
langchain = {path = "../langchain", develop = true}
langchain-core = {path = "../core", develop = true}
langchain-community = {path = "../community", develop = true}
Add data anonymizer (#9863) ### Description The feature for anonymizing data has been implemented. In order to protect private data, such as when querying external APIs (OpenAI), it is worth pseudonymizing sensitive data to maintain full privacy. Anonynization consists of two steps: 1. **Identification:** Identify all data fields that contain personally identifiable information (PII). 2. **Replacement**: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data. We use *Microsoft Presidio* together with *Faker* framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`. ### Future works - **deanonymization** - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data. - **instance anonymization** - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object. ### Twitter handle @deepsense_ai / @MaksOpp --------- Co-authored-by: MaksOpp <maks.operlejn@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-08-30 17:39:44 +00:00
# An extra used to be able to add extended testing.
# Please use new-line on formatting to make it easier to add new packages without
# merge-conflicts
[tool.poetry.extras]
extended_testing = [
"presidio-anonymizer",
"presidio-analyzer",
"faker",
2023-09-11 16:16:08 +00:00
"vowpal-wabbit-next",
"sentence-transformers",
"jinja2",
"pandas",
"tabulate",
Add data anonymizer (#9863) ### Description The feature for anonymizing data has been implemented. In order to protect private data, such as when querying external APIs (OpenAI), it is worth pseudonymizing sensitive data to maintain full privacy. Anonynization consists of two steps: 1. **Identification:** Identify all data fields that contain personally identifiable information (PII). 2. **Replacement**: Replace all PIIs with pseudo values or codes that do not reveal any personal information about the individual but can be used for reference. We're not using regular encryption, because the language model won't be able to understand the meaning or context of the encrypted data. We use *Microsoft Presidio* together with *Faker* framework for anonymization purposes because of the wide range of functionalities they provide. The full implementation is available in `PresidioAnonymizer`. ### Future works - **deanonymization** - add the ability to reverse anonymization. For example, the workflow could look like this: `anonymize -> LLMChain -> deanonymize`. By doing this, we will retain anonymity in requests to, for example, OpenAI, and then be able restore the original data. - **instance anonymization** - at this point, each occurrence of PII is treated as a separate entity and separately anonymized. Therefore, two occurrences of the name John Doe in the text will be changed to two different names. It is therefore worth introducing support for full instance detection, so that repeated occurrences are treated as a single object. ### Twitter handle @deepsense_ai / @MaksOpp --------- Co-authored-by: MaksOpp <maks.operlejn@gmail.com> Co-authored-by: Bagatur <baskaryan@gmail.com>
2023-08-30 17:39:44 +00:00
]
[tool.ruff.lint]
2023-07-21 17:36:28 +00:00
select = [
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"T201", # print
2023-07-21 17:36:28 +00:00
]
[tool.mypy]
ignore_missing_imports = "True"
disallow_untyped_defs = "True"
exclude = ["notebooks", "examples", "example_data"]
[tool.coverage.run]
omit = [
"tests/*",
]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"
[tool.pytest.ini_options]
# --strict-markers will raise errors on unknown marks.
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
#
# https://docs.pytest.org/en/7.1.x/reference/reference.html
# --strict-config any warnings encountered while parsing the `pytest`
# section of the configuration file raise errors.
#
# https://github.com/tophat/syrupy
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
addopts = "--strict-markers --strict-config --durations=5"
# Registering custom markers.
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
markers = [
"requires: mark tests as requiring a specific library",
"asyncio: mark tests as requiring asyncio",
"compile: mark placeholder test used to compile integration tests without running them",
2023-07-21 17:36:28 +00:00
]
asyncio_mode = "auto"