2023-12-11 21:53:30 +00:00
|
|
|
[tool.poetry]
|
|
|
|
name = "langchain-community"
|
2024-02-13 21:01:51 +00:00
|
|
|
version = "0.0.20"
|
2023-12-11 21:53:30 +00:00
|
|
|
description = "Community contributed LangChain integrations."
|
|
|
|
authors = []
|
|
|
|
license = "MIT"
|
|
|
|
readme = "README.md"
|
|
|
|
repository = "https://github.com/langchain-ai/langchain"
|
|
|
|
|
|
|
|
[tool.poetry.dependencies]
|
|
|
|
python = ">=3.8.1,<4.0"
|
2024-02-07 23:37:01 +00:00
|
|
|
langchain-core = ">=0.1.21,<0.2"
|
2023-12-11 21:53:30 +00:00
|
|
|
SQLAlchemy = ">=1.4,<3"
|
|
|
|
requests = "^2"
|
|
|
|
PyYAML = ">=5.3"
|
|
|
|
numpy = "^1"
|
|
|
|
aiohttp = "^3.8.3"
|
|
|
|
tenacity = "^8.1.0"
|
|
|
|
dataclasses-json = ">= 0.5.7, < 0.7"
|
2024-01-22 21:23:11 +00:00
|
|
|
langsmith = ">=0.0.83,<0.1"
|
2023-12-11 21:53:30 +00:00
|
|
|
tqdm = {version = ">=4.48.0", optional = true}
|
|
|
|
openapi-pydantic = {version = "^0.3.2", optional = true}
|
|
|
|
faiss-cpu = {version = "^1", optional = true}
|
|
|
|
beautifulsoup4 = {version = "^4", optional = true}
|
|
|
|
jinja2 = {version = "^3", optional = true}
|
|
|
|
cohere = {version = "^4", optional = true}
|
|
|
|
openai = {version = "<2", optional = true}
|
|
|
|
arxiv = {version = "^1.4", optional = true}
|
|
|
|
pypdf = {version = "^3.4.0", optional = true}
|
|
|
|
aleph-alpha-client = {version="^2.15.0", optional = true}
|
2023-12-19 16:46:33 +00:00
|
|
|
gradientai = {version="^1.4.0", optional = true}
|
2023-12-11 21:53:30 +00:00
|
|
|
pgvector = {version = "^0.1.6", optional = true}
|
|
|
|
atlassian-python-api = {version = "^3.36.0", optional=true}
|
|
|
|
html2text = {version="^2020.1.16", optional=true}
|
|
|
|
numexpr = {version="^2.8.6", optional=true}
|
|
|
|
jq = {version = "^1.4.1", optional = true}
|
|
|
|
pdfminer-six = {version = "^20221105", optional = true}
|
|
|
|
lxml = {version = "^4.9.2", optional = true}
|
|
|
|
pymupdf = {version = "^1.22.3", optional = true}
|
|
|
|
rapidocr-onnxruntime = {version = "^1.3.2", optional = true, python = ">=3.8.1,<3.12"}
|
|
|
|
pypdfium2 = {version = "^4.10.0", optional = true}
|
|
|
|
gql = {version = "^3.4.1", optional = true}
|
|
|
|
pandas = {version = "^2.0.1", optional = true}
|
|
|
|
telethon = {version = "^1.28.5", optional = true}
|
|
|
|
chardet = {version="^5.1.0", optional=true}
|
|
|
|
requests-toolbelt = {version = "^1.0.0", optional = true}
|
|
|
|
scikit-learn = {version = "^1.2.2", optional = true}
|
|
|
|
py-trello = {version = "^0.19.0", optional = true}
|
|
|
|
bibtexparser = {version = "^1.4.0", optional = true}
|
|
|
|
pyspark = {version = "^3.4.0", optional = true}
|
|
|
|
mwparserfromhell = {version = "^0.6.4", optional = true}
|
|
|
|
mwxml = {version = "^0.3.3", optional = true}
|
|
|
|
esprima = {version = "^4.0.1", optional = true}
|
|
|
|
streamlit = {version = "^1.18.0", optional = true, python = ">=3.8.1,<3.9.7 || >3.9.7,<4.0"}
|
|
|
|
psychicapi = {version = "^0.8.0", optional = true}
|
|
|
|
cassio = {version = "^0.1.0", optional = true}
|
|
|
|
sympy = {version = "^1.12", optional = true}
|
|
|
|
rapidfuzz = {version = "^3.1.1", optional = true}
|
|
|
|
jsonschema = {version = ">1", optional = true}
|
|
|
|
rank-bm25 = {version = "^0.2.2", optional = true}
|
|
|
|
geopandas = {version = "^0.13.1", optional = true}
|
|
|
|
gitpython = {version = "^3.1.32", optional = true}
|
|
|
|
feedparser = {version = "^6.0.10", optional = true}
|
|
|
|
newspaper3k = {version = "^0.2.8", optional = true}
|
|
|
|
xata = {version = "^1.0.0a7", optional = true}
|
|
|
|
xmltodict = {version = "^0.13.0", optional = true}
|
|
|
|
markdownify = {version = "^0.11.6", optional = true}
|
|
|
|
assemblyai = {version = "^0.17.0", optional = true}
|
|
|
|
sqlite-vss = {version = "^0.1.2", optional = true}
|
|
|
|
motor = {version = "^3.3.1", optional = true}
|
|
|
|
timescale-vector = {version = "^0.0.1", optional = true}
|
|
|
|
typer = {version= "^0.9.0", optional = true}
|
|
|
|
anthropic = {version = "^0.3.11", optional = true}
|
|
|
|
aiosqlite = {version = "^0.19.0", optional = true}
|
|
|
|
rspace_client = {version = "^2.5.0", optional = true}
|
|
|
|
upstash-redis = {version = "^0.15.0", optional = true}
|
|
|
|
google-cloud-documentai = {version = "^2.20.1", optional = true}
|
|
|
|
fireworks-ai = {version = "^0.9.0", optional = true}
|
|
|
|
javelin-sdk = {version = "^0.1.8", optional = true}
|
|
|
|
hologres-vector = {version = "^0.0.6", optional = true}
|
|
|
|
praw = {version = "^7.7.1", optional = true}
|
|
|
|
msal = {version = "^1.25.0", optional = true}
|
|
|
|
databricks-vectorsearch = {version = "^0.21", optional = true}
|
|
|
|
dgml-utils = {version = "^0.3.0", optional = true}
|
|
|
|
datasets = {version = "^2.15.0", optional = true}
|
Framework for supporting more languages in LanguageParser (#13318)
## Description
I am submitting this for a school project as part of a team of 5. Other
team members are @LeilaChr, @maazh10, @Megabear137, @jelalalamy. This PR
also has contributions from community members @Harrolee and @Mario928.
Initial context is in the issue we opened (#11229).
This pull request adds:
- Generic framework for expanding the languages that `LanguageParser`
can handle, using the
[tree-sitter](https://github.com/tree-sitter/py-tree-sitter#py-tree-sitter)
parsing library and existing language-specific parsers written for it
- Support for the following additional languages in `LanguageParser`:
- C
- C++
- C#
- Go
- Java (contributed by @Mario928
https://github.com/ThatsJustCheesy/langchain/pull/2)
- Kotlin
- Lua
- Perl
- Ruby
- Rust
- Scala
- TypeScript (contributed by @Harrolee
https://github.com/ThatsJustCheesy/langchain/pull/1)
Here is the [design
document](https://docs.google.com/document/d/17dB14cKCWAaiTeSeBtxHpoVPGKrsPye8W0o_WClz2kk)
if curious, but no need to read it.
## Issues
- Closes #11229
- Closes #10996
- Closes #8405
## Dependencies
`tree_sitter` and `tree_sitter_languages` on PyPI. We have tried to add
these as optional dependencies.
## Documentation
We have updated the list of supported languages, and also added a
section to `source_code.ipynb` detailing how to add support for
additional languages using our framework.
## Maintainer
- @hwchase17 (previously reviewed
https://github.com/langchain-ai/langchain/pull/6486)
Thanks!!
## Git commits
We will gladly squash any/all of our commits (esp merge commits) if
necessary. Let us know if this is desirable, or if you will be
squash-merging anyway.
<!-- Thank you for contributing to LangChain!
Replace this entire comment with:
- **Description:** a description of the change,
- **Issue:** the issue # it fixes (if applicable),
- **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!
Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.
See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
-->
---------
Co-authored-by: Maaz Hashmi <mhashmi373@gmail.com>
Co-authored-by: LeilaChr <87657694+LeilaChr@users.noreply.github.com>
Co-authored-by: Jeremy La <jeremylai511@gmail.com>
Co-authored-by: Megabear137 <zubair.alnoor27@gmail.com>
Co-authored-by: Lee Harrold <lhharrold@sep.com>
Co-authored-by: Mario928 <88029051+Mario928@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2024-02-13 16:45:49 +00:00
|
|
|
tree-sitter = {version = "^0.20.2", optional = true}
|
|
|
|
tree-sitter-languages = {version = "^1.8.0", optional = true}
|
2023-12-22 00:40:27 +00:00
|
|
|
azure-ai-documentintelligence = {version = "^1.0.0b1", optional = true}
|
2023-12-20 19:52:20 +00:00
|
|
|
oracle-ads = {version = "^2.9.1", optional = true}
|
community: Integration of New Chat Model Based on ChatGLM3 via ZhipuAI API (#15105)
- **Description:**
- This PR introduces a significant enhancement to the LangChain project
by integrating a new chat model powered by the third-generation base
large model, ChatGLM3, via the zhipuai API.
- This advanced model supports functionalities like function calls, code
interpretation, and intelligent Agent capabilities.
- The additions include the chat model itself, comprehensive
documentation in the form of Python notebook docs, and thorough testing
with both unit and integrated tests.
- **Dependencies:** This update relies on the ZhipuAI package as a key
dependency.
- **Twitter handle:** If this PR receives spotlight attention, we would
be honored to receive a mention for our integration of the advanced
ChatGLM3 model via the ZhipuAI API. Kindly tag us at @kaiwu.
To ensure quality and standards, we have performed extensive linting and
testing. Commands such as make format, make lint, and make test have
been run from the root of the modified package to ensure compliance with
LangChain's coding standards.
TO DO: Continue refining and enhancing both the unit tests and
integrated tests.
---------
Co-authored-by: jing <jingguo92@gmail.com>
Co-authored-by: hyy1987 <779003812@qq.com>
Co-authored-by: jianchuanqi <qijianchuan@hotmail.com>
Co-authored-by: lirq <whuclarence@gmail.com>
Co-authored-by: whucalrence <81530213+whucalrence@users.noreply.github.com>
Co-authored-by: Jing Guo <48378126+JaneCrystall@users.noreply.github.com>
2024-01-01 23:17:03 +00:00
|
|
|
zhipuai = {version = "^1.0.7", optional = true}
|
2024-01-30 04:30:52 +00:00
|
|
|
httpx = {version = "^0.24.1", optional = true}
|
2024-01-22 18:52:20 +00:00
|
|
|
elasticsearch = {version = "^8.12.0", optional = true}
|
2024-01-24 22:05:07 +00:00
|
|
|
hdbcli = {version = "^2.19.21", optional = true}
|
community[minor]: Add OCI Generative AI integration (#16548)
<!-- Thank you for contributing to LangChain!
Please title your PR "<package>: <description>", where <package> is
whichever of langchain, community, core, experimental, etc. is being
modified.
Replace this entire comment with:
- **Description:** Adding Oracle Cloud Infrastructure Generative AI
integration. Oracle Cloud Infrastructure (OCI) Generative AI is a fully
managed service that provides a set of state-of-the-art, customizable
large language models (LLMs) that cover a wide range of use cases, and
which is available through a single API. Using the OCI Generative AI
service you can access ready-to-use pretrained models, or create and
host your own fine-tuned custom models based on your own data on
dedicated AI clusters.
https://docs.oracle.com/en-us/iaas/Content/generative-ai/home.htm
- **Issue:** None,
- **Dependencies:** OCI Python SDK,
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!
Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` from the root
of the package you've modified to check this locally.
Passed
See contribution guidelines for more information on how to write/run
tests, lint, etc: https://python.langchain.com/docs/contributing/
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in
`docs/docs/integrations` directory.
we provide unit tests. However, we cannot provide integration tests due
to Oracle policies that prohibit public sharing of api keys.
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
-->
---------
Co-authored-by: Arthur Cheng <arthur.cheng@oracle.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
2024-01-25 02:23:50 +00:00
|
|
|
oci = {version = "^2.119.1", optional = true}
|
2024-01-29 20:25:53 +00:00
|
|
|
rdflib = {version = "7.0.0", optional = true}
|
community: Added new Utility runnables for NVIDIA Riva. (#15966)
**Please tag this issue with `nvidia_genai`**
- **Description:** Added new Runnables for integration NVIDIA Riva into
LCEL chains for Automatic Speech Recognition (ASR) and Text To Speech
(TTS).
- **Issue:** N/A
- **Dependencies:** To use these runnables, the NVIDIA Riva client
libraries are required. It they are not installed, an error will be
raised instructing how to install them. The Runnables can be safely
imported without the riva client libraries.
- **Twitter handle:** N/A
All of the Riva Runnables are inside a single folder in the Utilities
module. In this folder are four files:
- common.py - Contains all code that is common to both TTS and ASR
- stream.py - Contains a class representing an audio stream that allows
the end user to put data into the stream like a queue.
- asr.py - Contains the RivaASR runnable
- tts.py - Contains the RivaTTS runnable
The following Python function is an example of creating a chain that
makes use of both of these Runnables:
```python
def create(
config: Configuration,
audio_encoding: RivaAudioEncoding,
sample_rate: int,
audio_channels: int = 1,
) -> Runnable[ASRInputType, TTSOutputType]:
"""Create a new instance of the chain."""
_LOGGER.info("Instantiating the chain.")
# create the riva asr client
riva_asr = RivaASR(
url=str(config.riva_asr.service.url),
ssl_cert=config.riva_asr.service.ssl_cert,
encoding=audio_encoding,
audio_channel_count=audio_channels,
sample_rate_hertz=sample_rate,
profanity_filter=config.riva_asr.profanity_filter,
enable_automatic_punctuation=config.riva_asr.enable_automatic_punctuation,
language_code=config.riva_asr.language_code,
)
# create the prompt template
prompt = PromptTemplate.from_template("{user_input}")
# model = ChatOpenAI()
model = ChatNVIDIA(model="mixtral_8x7b") # type: ignore
# create the riva tts client
riva_tts = RivaTTS(
url=str(config.riva_asr.service.url),
ssl_cert=config.riva_asr.service.ssl_cert,
output_directory=config.riva_tts.output_directory,
language_code=config.riva_tts.language_code,
voice_name=config.riva_tts.voice_name,
)
# construct and return the chain
return {"user_input": riva_asr} | prompt | model | riva_tts # type: ignore
```
The following code is an example of creating a new audio stream for
Riva:
```python
input_stream = AudioStream(maxsize=1000)
# Send bytes into the stream
for chunk in audio_chunks:
await input_stream.aput(chunk)
input_stream.close()
```
The following code is an example of how to execute the chain with
RivaASR and RivaTTS
```python
output_stream = asyncio.Queue()
while not input_stream.complete:
async for chunk in chain.astream(input_stream):
output_stream.put(chunk)
```
Everything should be async safe and thread safe. Audio data can be put
into the input stream while the chain is running without interruptions.
---------
Co-authored-by: Hayden Wolff <hwolff@nvidia.com>
Co-authored-by: Hayden Wolff <hwolff@Haydens-Laptop.local>
Co-authored-by: Hayden Wolff <haydenwolff99@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
2024-02-06 03:50:50 +00:00
|
|
|
nvidia-riva-client = {version = "^2.14.0", optional = true}
|
2023-12-11 21:53:30 +00:00
|
|
|
|
|
|
|
[tool.poetry.group.test]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.test.dependencies]
|
|
|
|
# The only dependencies that should be added are
|
|
|
|
# dependencies used for running tests (e.g., pytest, freezegun, response).
|
|
|
|
# Any dependencies that do not meet that criteria will be removed.
|
|
|
|
pytest = "^7.3.0"
|
2024-02-05 19:22:06 +00:00
|
|
|
pytest-cov = "^4.1.0"
|
2023-12-11 21:53:30 +00:00
|
|
|
pytest-dotenv = "^0.5.2"
|
|
|
|
duckdb-engine = "^0.9.2"
|
|
|
|
pytest-watcher = "^0.2.6"
|
|
|
|
freezegun = "^1.2.2"
|
|
|
|
responses = "^0.22.0"
|
|
|
|
pytest-asyncio = "^0.20.3"
|
|
|
|
lark = "^1.1.5"
|
|
|
|
pandas = "^2.0.0"
|
|
|
|
pytest-mock = "^3.10.0"
|
|
|
|
pytest-socket = "^0.6.0"
|
|
|
|
syrupy = "^4.0.2"
|
|
|
|
requests-mock = "^1.11.0"
|
|
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
|
|
|
|
[tool.poetry.group.codespell]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.codespell.dependencies]
|
|
|
|
codespell = "^2.2.0"
|
|
|
|
|
|
|
|
[tool.poetry.group.test_integration]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.test_integration.dependencies]
|
|
|
|
# Do not add dependencies in the test_integration group
|
|
|
|
# Instead:
|
|
|
|
# 1. Add an optional dependency to the main group
|
|
|
|
# poetry add --optional [package name]
|
|
|
|
# 2. Add the package name to the extended_testing extra (find it below)
|
|
|
|
# 3. Relock the poetry file
|
|
|
|
# poetry lock --no-update
|
|
|
|
# 4. Favor unit tests not integration tests.
|
|
|
|
# Use the @pytest.mark.requires(pkg_name) decorator in unit_tests.
|
|
|
|
# Your tests should not rely on network access, as it prevents other
|
|
|
|
# developers from being able to easily run them.
|
|
|
|
# Instead write unit tests that use the `responses` library or mock.patch with
|
|
|
|
# fixtures. Keep the fixtures minimal.
|
2023-12-17 20:55:49 +00:00
|
|
|
# See Contributing Guide for more instructions on working with optional dependencies.
|
|
|
|
# https://python.langchain.com/docs/contributing/code#working-with-optional-dependencies
|
2023-12-11 21:53:30 +00:00
|
|
|
pytest-vcr = "^1.0.2"
|
|
|
|
wrapt = "^1.15.0"
|
|
|
|
openai = "^1"
|
|
|
|
python-dotenv = "^1.0.0"
|
|
|
|
cassio = "^0.1.0"
|
2024-01-05 23:03:28 +00:00
|
|
|
tiktoken = ">=0.3.2,<0.6.0"
|
2023-12-11 21:53:30 +00:00
|
|
|
anthropic = "^0.3.11"
|
|
|
|
langchain-core = { path = "../core", develop = true }
|
|
|
|
fireworks-ai = "^0.9.0"
|
|
|
|
|
|
|
|
[tool.poetry.group.lint]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.lint.dependencies]
|
|
|
|
ruff = "^0.1.5"
|
|
|
|
|
|
|
|
[tool.poetry.group.typing.dependencies]
|
|
|
|
mypy = "^0.991"
|
|
|
|
types-pyyaml = "^6.0.12.2"
|
|
|
|
types-requests = "^2.28.11.5"
|
|
|
|
types-toml = "^0.10.8.1"
|
|
|
|
types-pytz = "^2023.3.0.0"
|
|
|
|
types-chardet = "^5.0.4.6"
|
|
|
|
types-redis = "^4.3.21.6"
|
|
|
|
mypy-protobuf = "^3.0.0"
|
|
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
|
|
|
|
[tool.poetry.group.dev]
|
|
|
|
optional = true
|
|
|
|
|
|
|
|
[tool.poetry.group.dev.dependencies]
|
|
|
|
jupyter = "^1.0.0"
|
|
|
|
setuptools = "^67.6.1"
|
|
|
|
langchain-core = {path = "../core", develop = true}
|
|
|
|
|
|
|
|
[tool.poetry.extras]
|
|
|
|
cli = ["typer"]
|
|
|
|
|
|
|
|
# An extra used to be able to add extended testing.
|
|
|
|
# Please use new-line on formatting to make it easier to add new packages without
|
|
|
|
# merge-conflicts
|
|
|
|
extended_testing = [
|
|
|
|
"aleph-alpha-client",
|
|
|
|
"aiosqlite",
|
|
|
|
"assemblyai",
|
|
|
|
"beautifulsoup4",
|
|
|
|
"bibtexparser",
|
|
|
|
"cassio",
|
|
|
|
"chardet",
|
|
|
|
"datasets",
|
|
|
|
"google-cloud-documentai",
|
|
|
|
"esprima",
|
|
|
|
"jq",
|
|
|
|
"pdfminer-six",
|
|
|
|
"pgvector",
|
|
|
|
"pypdf",
|
|
|
|
"pymupdf",
|
|
|
|
"pypdfium2",
|
|
|
|
"tqdm",
|
|
|
|
"lxml",
|
|
|
|
"atlassian-python-api",
|
|
|
|
"mwparserfromhell",
|
|
|
|
"mwxml",
|
|
|
|
"msal",
|
|
|
|
"pandas",
|
|
|
|
"telethon",
|
|
|
|
"psychicapi",
|
|
|
|
"gql",
|
2023-12-19 16:46:33 +00:00
|
|
|
"gradientai",
|
2023-12-11 21:53:30 +00:00
|
|
|
"requests-toolbelt",
|
|
|
|
"html2text",
|
|
|
|
"numexpr",
|
|
|
|
"py-trello",
|
|
|
|
"scikit-learn",
|
|
|
|
"streamlit",
|
|
|
|
"pyspark",
|
|
|
|
"openai",
|
|
|
|
"sympy",
|
|
|
|
"rapidfuzz",
|
|
|
|
"jsonschema",
|
|
|
|
"rank-bm25",
|
|
|
|
"geopandas",
|
|
|
|
"jinja2",
|
|
|
|
"gitpython",
|
|
|
|
"newspaper3k",
|
community: Added new Utility runnables for NVIDIA Riva. (#15966)
**Please tag this issue with `nvidia_genai`**
- **Description:** Added new Runnables for integration NVIDIA Riva into
LCEL chains for Automatic Speech Recognition (ASR) and Text To Speech
(TTS).
- **Issue:** N/A
- **Dependencies:** To use these runnables, the NVIDIA Riva client
libraries are required. It they are not installed, an error will be
raised instructing how to install them. The Runnables can be safely
imported without the riva client libraries.
- **Twitter handle:** N/A
All of the Riva Runnables are inside a single folder in the Utilities
module. In this folder are four files:
- common.py - Contains all code that is common to both TTS and ASR
- stream.py - Contains a class representing an audio stream that allows
the end user to put data into the stream like a queue.
- asr.py - Contains the RivaASR runnable
- tts.py - Contains the RivaTTS runnable
The following Python function is an example of creating a chain that
makes use of both of these Runnables:
```python
def create(
config: Configuration,
audio_encoding: RivaAudioEncoding,
sample_rate: int,
audio_channels: int = 1,
) -> Runnable[ASRInputType, TTSOutputType]:
"""Create a new instance of the chain."""
_LOGGER.info("Instantiating the chain.")
# create the riva asr client
riva_asr = RivaASR(
url=str(config.riva_asr.service.url),
ssl_cert=config.riva_asr.service.ssl_cert,
encoding=audio_encoding,
audio_channel_count=audio_channels,
sample_rate_hertz=sample_rate,
profanity_filter=config.riva_asr.profanity_filter,
enable_automatic_punctuation=config.riva_asr.enable_automatic_punctuation,
language_code=config.riva_asr.language_code,
)
# create the prompt template
prompt = PromptTemplate.from_template("{user_input}")
# model = ChatOpenAI()
model = ChatNVIDIA(model="mixtral_8x7b") # type: ignore
# create the riva tts client
riva_tts = RivaTTS(
url=str(config.riva_asr.service.url),
ssl_cert=config.riva_asr.service.ssl_cert,
output_directory=config.riva_tts.output_directory,
language_code=config.riva_tts.language_code,
voice_name=config.riva_tts.voice_name,
)
# construct and return the chain
return {"user_input": riva_asr} | prompt | model | riva_tts # type: ignore
```
The following code is an example of creating a new audio stream for
Riva:
```python
input_stream = AudioStream(maxsize=1000)
# Send bytes into the stream
for chunk in audio_chunks:
await input_stream.aput(chunk)
input_stream.close()
```
The following code is an example of how to execute the chain with
RivaASR and RivaTTS
```python
output_stream = asyncio.Queue()
while not input_stream.complete:
async for chunk in chain.astream(input_stream):
output_stream.put(chunk)
```
Everything should be async safe and thread safe. Audio data can be put
into the input stream while the chain is running without interruptions.
---------
Co-authored-by: Hayden Wolff <hwolff@nvidia.com>
Co-authored-by: Hayden Wolff <hwolff@Haydens-Laptop.local>
Co-authored-by: Hayden Wolff <haydenwolff99@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
2024-02-06 03:50:50 +00:00
|
|
|
"nvidia-riva-client",
|
2023-12-11 21:53:30 +00:00
|
|
|
"feedparser",
|
|
|
|
"xata",
|
|
|
|
"xmltodict",
|
|
|
|
"faiss-cpu",
|
|
|
|
"openapi-pydantic",
|
|
|
|
"markdownify",
|
|
|
|
"arxiv",
|
|
|
|
"sqlite-vss",
|
|
|
|
"rapidocr-onnxruntime",
|
|
|
|
"motor",
|
|
|
|
"timescale-vector",
|
|
|
|
"anthropic",
|
|
|
|
"upstash-redis",
|
|
|
|
"rspace_client",
|
|
|
|
"fireworks-ai",
|
|
|
|
"javelin-sdk",
|
|
|
|
"hologres-vector",
|
|
|
|
"praw",
|
|
|
|
"databricks-vectorsearch",
|
|
|
|
"dgml-utils",
|
|
|
|
"cohere",
|
Framework for supporting more languages in LanguageParser (#13318)
## Description
I am submitting this for a school project as part of a team of 5. Other
team members are @LeilaChr, @maazh10, @Megabear137, @jelalalamy. This PR
also has contributions from community members @Harrolee and @Mario928.
Initial context is in the issue we opened (#11229).
This pull request adds:
- Generic framework for expanding the languages that `LanguageParser`
can handle, using the
[tree-sitter](https://github.com/tree-sitter/py-tree-sitter#py-tree-sitter)
parsing library and existing language-specific parsers written for it
- Support for the following additional languages in `LanguageParser`:
- C
- C++
- C#
- Go
- Java (contributed by @Mario928
https://github.com/ThatsJustCheesy/langchain/pull/2)
- Kotlin
- Lua
- Perl
- Ruby
- Rust
- Scala
- TypeScript (contributed by @Harrolee
https://github.com/ThatsJustCheesy/langchain/pull/1)
Here is the [design
document](https://docs.google.com/document/d/17dB14cKCWAaiTeSeBtxHpoVPGKrsPye8W0o_WClz2kk)
if curious, but no need to read it.
## Issues
- Closes #11229
- Closes #10996
- Closes #8405
## Dependencies
`tree_sitter` and `tree_sitter_languages` on PyPI. We have tried to add
these as optional dependencies.
## Documentation
We have updated the list of supported languages, and also added a
section to `source_code.ipynb` detailing how to add support for
additional languages using our framework.
## Maintainer
- @hwchase17 (previously reviewed
https://github.com/langchain-ai/langchain/pull/6486)
Thanks!!
## Git commits
We will gladly squash any/all of our commits (esp merge commits) if
necessary. Let us know if this is desirable, or if you will be
squash-merging anyway.
<!-- Thank you for contributing to LangChain!
Replace this entire comment with:
- **Description:** a description of the change,
- **Issue:** the issue # it fixes (if applicable),
- **Dependencies:** any dependencies required for this change,
- **Tag maintainer:** for a quicker response, tag the relevant
maintainer (see below),
- **Twitter handle:** we announce bigger features on Twitter. If your PR
gets announced, and you'd like a mention, we'll gladly shout you out!
Please make sure your PR is passing linting and testing before
submitting. Run `make format`, `make lint` and `make test` to check this
locally.
See contribution guidelines for more information on how to write/run
tests, lint, etc:
https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md
If you're adding a new integration, please include:
1. a test for the integration, preferably unit tests that do not rely on
network access,
2. an example notebook showing its use. It lives in `docs/extras`
directory.
If no one reviews your PR within a few days, please @-mention one of
@baskaryan, @eyurtsev, @hwchase17.
-->
---------
Co-authored-by: Maaz Hashmi <mhashmi373@gmail.com>
Co-authored-by: LeilaChr <87657694+LeilaChr@users.noreply.github.com>
Co-authored-by: Jeremy La <jeremylai511@gmail.com>
Co-authored-by: Megabear137 <zubair.alnoor27@gmail.com>
Co-authored-by: Lee Harrold <lhharrold@sep.com>
Co-authored-by: Mario928 <88029051+Mario928@users.noreply.github.com>
Co-authored-by: Bagatur <baskaryan@gmail.com>
Co-authored-by: Harrison Chase <hw.chase.17@gmail.com>
2024-02-13 16:45:49 +00:00
|
|
|
"tree-sitter",
|
|
|
|
"tree-sitter-languages",
|
2023-12-22 00:40:27 +00:00
|
|
|
"azure-ai-documentintelligence",
|
2023-12-20 19:52:20 +00:00
|
|
|
"oracle-ads",
|
community: Integration of New Chat Model Based on ChatGLM3 via ZhipuAI API (#15105)
- **Description:**
- This PR introduces a significant enhancement to the LangChain project
by integrating a new chat model powered by the third-generation base
large model, ChatGLM3, via the zhipuai API.
- This advanced model supports functionalities like function calls, code
interpretation, and intelligent Agent capabilities.
- The additions include the chat model itself, comprehensive
documentation in the form of Python notebook docs, and thorough testing
with both unit and integrated tests.
- **Dependencies:** This update relies on the ZhipuAI package as a key
dependency.
- **Twitter handle:** If this PR receives spotlight attention, we would
be honored to receive a mention for our integration of the advanced
ChatGLM3 model via the ZhipuAI API. Kindly tag us at @kaiwu.
To ensure quality and standards, we have performed extensive linting and
testing. Commands such as make format, make lint, and make test have
been run from the root of the modified package to ensure compliance with
LangChain's coding standards.
TO DO: Continue refining and enhancing both the unit tests and
integrated tests.
---------
Co-authored-by: jing <jingguo92@gmail.com>
Co-authored-by: hyy1987 <779003812@qq.com>
Co-authored-by: jianchuanqi <qijianchuan@hotmail.com>
Co-authored-by: lirq <whuclarence@gmail.com>
Co-authored-by: whucalrence <81530213+whucalrence@users.noreply.github.com>
Co-authored-by: Jing Guo <48378126+JaneCrystall@users.noreply.github.com>
2024-01-01 23:17:03 +00:00
|
|
|
"zhipuai",
|
2024-01-30 04:30:52 +00:00
|
|
|
"httpx",
|
2024-01-22 18:52:20 +00:00
|
|
|
"elasticsearch",
|
2024-01-24 22:05:07 +00:00
|
|
|
"hdbcli",
|
2024-01-29 20:25:53 +00:00
|
|
|
"oci",
|
community: Added new Utility runnables for NVIDIA Riva. (#15966)
**Please tag this issue with `nvidia_genai`**
- **Description:** Added new Runnables for integration NVIDIA Riva into
LCEL chains for Automatic Speech Recognition (ASR) and Text To Speech
(TTS).
- **Issue:** N/A
- **Dependencies:** To use these runnables, the NVIDIA Riva client
libraries are required. It they are not installed, an error will be
raised instructing how to install them. The Runnables can be safely
imported without the riva client libraries.
- **Twitter handle:** N/A
All of the Riva Runnables are inside a single folder in the Utilities
module. In this folder are four files:
- common.py - Contains all code that is common to both TTS and ASR
- stream.py - Contains a class representing an audio stream that allows
the end user to put data into the stream like a queue.
- asr.py - Contains the RivaASR runnable
- tts.py - Contains the RivaTTS runnable
The following Python function is an example of creating a chain that
makes use of both of these Runnables:
```python
def create(
config: Configuration,
audio_encoding: RivaAudioEncoding,
sample_rate: int,
audio_channels: int = 1,
) -> Runnable[ASRInputType, TTSOutputType]:
"""Create a new instance of the chain."""
_LOGGER.info("Instantiating the chain.")
# create the riva asr client
riva_asr = RivaASR(
url=str(config.riva_asr.service.url),
ssl_cert=config.riva_asr.service.ssl_cert,
encoding=audio_encoding,
audio_channel_count=audio_channels,
sample_rate_hertz=sample_rate,
profanity_filter=config.riva_asr.profanity_filter,
enable_automatic_punctuation=config.riva_asr.enable_automatic_punctuation,
language_code=config.riva_asr.language_code,
)
# create the prompt template
prompt = PromptTemplate.from_template("{user_input}")
# model = ChatOpenAI()
model = ChatNVIDIA(model="mixtral_8x7b") # type: ignore
# create the riva tts client
riva_tts = RivaTTS(
url=str(config.riva_asr.service.url),
ssl_cert=config.riva_asr.service.ssl_cert,
output_directory=config.riva_tts.output_directory,
language_code=config.riva_tts.language_code,
voice_name=config.riva_tts.voice_name,
)
# construct and return the chain
return {"user_input": riva_asr} | prompt | model | riva_tts # type: ignore
```
The following code is an example of creating a new audio stream for
Riva:
```python
input_stream = AudioStream(maxsize=1000)
# Send bytes into the stream
for chunk in audio_chunks:
await input_stream.aput(chunk)
input_stream.close()
```
The following code is an example of how to execute the chain with
RivaASR and RivaTTS
```python
output_stream = asyncio.Queue()
while not input_stream.complete:
async for chunk in chain.astream(input_stream):
output_stream.put(chunk)
```
Everything should be async safe and thread safe. Audio data can be put
into the input stream while the chain is running without interruptions.
---------
Co-authored-by: Hayden Wolff <hwolff@nvidia.com>
Co-authored-by: Hayden Wolff <hwolff@Haydens-Laptop.local>
Co-authored-by: Hayden Wolff <haydenwolff99@gmail.com>
Co-authored-by: Erick Friis <erick@langchain.dev>
2024-02-06 03:50:50 +00:00
|
|
|
"rdflib"
|
2023-12-11 21:53:30 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
[tool.ruff]
|
2024-02-09 22:28:02 +00:00
|
|
|
exclude = [
|
|
|
|
"tests/examples/non-utf8-encoding.py",
|
|
|
|
"tests/integration_tests/examples/non-utf8-encoding.py",
|
|
|
|
]
|
|
|
|
|
|
|
|
[tool.ruff.lint]
|
2023-12-11 21:53:30 +00:00
|
|
|
select = [
|
|
|
|
"E", # pycodestyle
|
|
|
|
"F", # pyflakes
|
|
|
|
"I", # isort
|
2024-02-10 00:13:30 +00:00
|
|
|
"T201", # print
|
2023-12-11 21:53:30 +00:00
|
|
|
]
|
|
|
|
|
|
|
|
[tool.mypy]
|
|
|
|
ignore_missing_imports = "True"
|
|
|
|
disallow_untyped_defs = "True"
|
|
|
|
exclude = ["notebooks", "examples", "example_data"]
|
|
|
|
|
|
|
|
[tool.coverage.run]
|
|
|
|
omit = [
|
|
|
|
"tests/*",
|
|
|
|
]
|
|
|
|
|
|
|
|
[build-system]
|
|
|
|
requires = ["poetry-core>=1.0.0"]
|
|
|
|
build-backend = "poetry.core.masonry.api"
|
|
|
|
|
|
|
|
[tool.pytest.ini_options]
|
|
|
|
# --strict-markers will raise errors on unknown marks.
|
|
|
|
# https://docs.pytest.org/en/7.1.x/how-to/mark.html#raising-errors-on-unknown-marks
|
|
|
|
#
|
|
|
|
# https://docs.pytest.org/en/7.1.x/reference/reference.html
|
|
|
|
# --strict-config any warnings encountered while parsing the `pytest`
|
|
|
|
# section of the configuration file raise errors.
|
|
|
|
#
|
|
|
|
# https://github.com/tophat/syrupy
|
|
|
|
# --snapshot-warn-unused Prints a warning on unused snapshots rather than fail the test suite.
|
|
|
|
addopts = "--strict-markers --strict-config --durations=5 --snapshot-warn-unused -vv"
|
|
|
|
# Registering custom markers.
|
|
|
|
# https://docs.pytest.org/en/7.1.x/example/markers.html#registering-markers
|
|
|
|
markers = [
|
|
|
|
"requires: mark tests as requiring a specific library",
|
|
|
|
"scheduled: mark tests to run in scheduled testing",
|
|
|
|
"compile: mark placeholder test used to compile integration tests without running them"
|
|
|
|
]
|
|
|
|
asyncio_mode = "auto"
|
|
|
|
|
|
|
|
[tool.codespell]
|
2024-01-29 20:25:53 +00:00
|
|
|
skip = '.git,*.pdf,*.svg,*.pdf,*.yaml,*.ipynb,poetry.lock,*.min.js,*.css,package-lock.json,example_data,_dist,examples,*.trig'
|
2023-12-11 21:53:30 +00:00
|
|
|
# Ignore latin etc
|
|
|
|
ignore-regex = '.*(Stati Uniti|Tense=Pres).*'
|
|
|
|
# whats is a typo but used frequently in queries so kept as is
|
|
|
|
# aapply - async apply
|
|
|
|
# unsecure - typo but part of API, decided to not bother for now
|
|
|
|
ignore-words-list = 'momento,collison,ned,foor,reworkd,parth,whats,aapply,mysogyny,unsecure,damon,crate,aadd,symbl,precesses,accademia,nin'
|