mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
Merge branch 'master' into master
This commit is contained in:
commit
f1072cc31f
4
.github/CONTRIBUTING.md
vendored
4
.github/CONTRIBUTING.md
vendored
@ -80,10 +80,10 @@ For example, to contribute to `langchain` run `cd libs/langchain` before getting
|
||||
To install requirements:
|
||||
|
||||
```bash
|
||||
poetry install -E all
|
||||
poetry install --with test
|
||||
```
|
||||
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage. Note the `-E all` flag will install all optional dependencies necessary for integration testing.
|
||||
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage.
|
||||
|
||||
❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.
|
||||
|
||||
|
48
.github/actions/poetry_setup/action.yml
vendored
48
.github/actions/poetry_setup/action.yml
vendored
@ -15,19 +15,13 @@ inputs:
|
||||
description: Poetry version
|
||||
required: true
|
||||
|
||||
install-command:
|
||||
description: Command run for installing dependencies
|
||||
required: false
|
||||
default: poetry install
|
||||
|
||||
cache-key:
|
||||
description: Cache key to use for manual handling of caching
|
||||
required: true
|
||||
|
||||
working-directory:
|
||||
description: Directory to run install-command in
|
||||
required: false
|
||||
default: ""
|
||||
description: Directory whose poetry.lock file should be cached
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: composite
|
||||
@ -38,47 +32,35 @@ runs:
|
||||
python-version: ${{ inputs.python-version }}
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
name: Cache Pip ${{ inputs.python-version }}
|
||||
id: cache-bin-poetry
|
||||
name: Cache Poetry binary - Python ${{ inputs.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
|
||||
/opt/pipx/venvs/poetry
|
||||
/opt/pipx_bin/poetry
|
||||
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
|
||||
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
|
||||
|
||||
- name: Install poetry
|
||||
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
|
||||
shell: bash
|
||||
env:
|
||||
POETRY_VERSION: ${{ inputs.poetry-version }}
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
run: pipx install "poetry==$POETRY_VERSION" --python "python$PYTHON_VERSION" --verbose
|
||||
|
||||
- name: Check Poetry File
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry check
|
||||
|
||||
- name: Check lock file
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry lock --check
|
||||
|
||||
- uses: actions/cache@v3
|
||||
id: cache-poetry
|
||||
- name: Restore pip and poetry cached dependencies
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
|
||||
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
~/.cache/pypoetry/virtualenvs
|
||||
~/.cache/pypoetry/cache
|
||||
~/.cache/pypoetry/artifacts
|
||||
${{ env.WORKDIR }}/.venv
|
||||
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
|
||||
- run: ${{ inputs.install-command }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
shell: bash
|
||||
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}
|
||||
|
43
.github/workflows/_lint.yml
vendored
43
.github/workflows/_lint.yml
vendored
@ -80,31 +80,32 @@ jobs:
|
||||
find "$WORKDIR" -name '*.py' -type f -not -newermt "$OLDEST_COMMIT_TIME" -exec touch -c -m -t '200001010000' '{}' '+'
|
||||
|
||||
echo "oldest-commit=$OLDEST_COMMIT" >> "$GITHUB_OUTPUT"
|
||||
- uses: actions/cache@v3
|
||||
id: cache-pip
|
||||
name: Cache langchain editable pip install - ${{ matrix.python-version }}
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
with:
|
||||
path: |
|
||||
~/.cache/pip
|
||||
key: pip-editable-langchain-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ matrix.python-version }}
|
||||
- name: Install poetry
|
||||
run: |
|
||||
pipx install "poetry==$POETRY_VERSION"
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: actions/setup-python@v4
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
cache: poetry
|
||||
cache-dependency-path: |
|
||||
${{ env.WORKDIR }}/**/poetry.lock
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: lint
|
||||
|
||||
- name: Check Poetry File
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry check
|
||||
|
||||
- name: Check lock file
|
||||
shell: bash
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry lock --check
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
run: |
|
||||
poetry install
|
||||
|
||||
- name: Install langchain editable
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
if: ${{ inputs.working-directory != 'libs/langchain' }}
|
||||
@ -115,7 +116,7 @@ jobs:
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
CACHE_BASE: black-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.black_cache
|
||||
@ -127,7 +128,7 @@ jobs:
|
||||
- name: Get .mypy_cache to speed up mypy
|
||||
uses: actions/cache@v3
|
||||
env:
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
|
||||
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
|
||||
with:
|
||||
path: |
|
||||
${{ env.WORKDIR }}/.mypy_cache
|
||||
|
81
.github/workflows/_pydantic_compatibility.yml
vendored
Normal file
81
.github/workflows/_pydantic_compatibility.yml
vendored
Normal file
@ -0,0 +1,81 @@
|
||||
name: pydantic v1/v2 compatibility
|
||||
|
||||
on:
|
||||
workflow_call:
|
||||
inputs:
|
||||
working-directory:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
|
||||
jobs:
|
||||
build:
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
runs-on: ubuntu-latest
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: pydantic-cross-compat
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: poetry install
|
||||
|
||||
- name: Install the opposite major version of pydantic
|
||||
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
|
||||
shell: bash
|
||||
run: |
|
||||
# Determine the major part of pydantic version
|
||||
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
if [[ "$REGULAR_VERSION" == "1" ]]; then
|
||||
PYDANTIC_DEP=">=2.1,<3"
|
||||
TEST_WITH_VERSION="2"
|
||||
elif [[ "$REGULAR_VERSION" == "2" ]]; then
|
||||
PYDANTIC_DEP="<2"
|
||||
TEST_WITH_VERSION="1"
|
||||
else
|
||||
echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install "pydantic${PYDANTIC_DEP}"
|
||||
|
||||
# Ensure that the correct pydantic is installed now.
|
||||
echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
|
||||
echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
|
||||
- name: Run pydantic compatibility tests
|
||||
shell: bash
|
||||
run: make test
|
3
.github/workflows/_release.yml
vendored
3
.github/workflows/_release.yml
vendored
@ -23,6 +23,9 @@ jobs:
|
||||
# Trusted publishing has to also be configured on PyPI for each package:
|
||||
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
|
||||
id-token: write
|
||||
|
||||
# This permission is needed by `ncipollo/release-action` to create the GitHub release.
|
||||
contents: write
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
|
63
.github/workflows/_test.yml
vendored
63
.github/workflows/_test.yml
vendored
@ -7,10 +7,6 @@ on:
|
||||
required: true
|
||||
type: string
|
||||
description: "From which folder this pipeline executes"
|
||||
test_type:
|
||||
type: string
|
||||
description: "Test types to run"
|
||||
default: '["core", "extended", "core-pydantic-2"]'
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
@ -28,61 +24,22 @@ jobs:
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
test_type: ${{ fromJSON(inputs.test_type) }}
|
||||
name: Python ${{ matrix.python-version }} ${{ matrix.test_type }}
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
cache-key: ${{ matrix.test_type }}
|
||||
install-command: |
|
||||
if [ "${{ matrix.test_type }}" == "core" ]; then
|
||||
echo "Running core tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
elif [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
|
||||
echo "Running core-pydantic-v2 tests, installing dependencies with poetry..."
|
||||
poetry install
|
||||
working-directory: ${{ inputs.working-directory }}
|
||||
cache-key: core
|
||||
|
||||
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
|
||||
# which would prevent caching from working: the cache would get saved
|
||||
# to a different key than where it gets loaded from.
|
||||
poetry run pip install 'pydantic>=2.1,<3'
|
||||
else
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing
|
||||
fi
|
||||
- name: Verify pydantic version
|
||||
run: |
|
||||
if [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
|
||||
EXPECTED_VERSION=2
|
||||
else
|
||||
EXPECTED_VERSION=1
|
||||
fi
|
||||
echo "Checking pydantic version... Expecting ${EXPECTED_VERSION}"
|
||||
|
||||
# Determine the major part of pydantic version
|
||||
VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
|
||||
|
||||
# Check that the major part of pydantic version is as expected, if not
|
||||
# raise an error
|
||||
if [[ "$VERSION" -ne $EXPECTED_VERSION ]]; then
|
||||
echo "Error: pydantic version must be equal to ${EXPECTED_VERSION}; Found: ${VERSION}"
|
||||
exit 1
|
||||
fi
|
||||
echo "Found pydantic version ${VERSION}, as expected"
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
- name: Run ${{matrix.test_type}} tests
|
||||
run: |
|
||||
case "${{ matrix.test_type }}" in
|
||||
core | core-pydantic-2)
|
||||
make test
|
||||
;;
|
||||
*)
|
||||
make extended_tests
|
||||
;;
|
||||
esac
|
||||
run: poetry install
|
||||
|
||||
- name: Run core tests
|
||||
shell: bash
|
||||
run: make test
|
||||
|
59
.github/workflows/langchain_ci.yml
vendored
59
.github/workflows/langchain_ci.yml
vendored
@ -8,10 +8,25 @@ on:
|
||||
paths:
|
||||
- '.github/workflows/_lint.yml'
|
||||
- '.github/workflows/_test.yml'
|
||||
- '.github/workflows/_pydantic_compatibility.yml'
|
||||
- '.github/workflows/langchain_ci.yml'
|
||||
- 'libs/langchain/**'
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
WORKDIR: "libs/langchain"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses:
|
||||
@ -19,10 +34,50 @@ jobs:
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
uses:
|
||||
./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
test_type: '["core", "extended", "core-pydantic-2"]'
|
||||
secrets: inherit
|
||||
secrets: inherit
|
||||
|
||||
pydantic-compatibility:
|
||||
uses:
|
||||
./.github/workflows/_pydantic_compatibility.yml
|
||||
with:
|
||||
working-directory: libs/langchain
|
||||
secrets: inherit
|
||||
|
||||
extended-tests:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: Python ${{ matrix.python-version }} extended tests
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: libs/langchain
|
||||
cache-key: extended
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running extended tests, installing dependencies with poetry..."
|
||||
poetry install -E extended_testing
|
||||
|
||||
- name: Run extended tests
|
||||
run: make extended_tests
|
||||
|
58
.github/workflows/langchain_experimental_ci.yml
vendored
58
.github/workflows/langchain_experimental_ci.yml
vendored
@ -13,6 +13,20 @@ on:
|
||||
- 'libs/experimental/**'
|
||||
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
|
||||
|
||||
# If another push to the same PR or branch happens while this workflow is still running,
|
||||
# cancel the earlier run in favor of the next run.
|
||||
#
|
||||
# There's no point in testing an outdated version of the code. GitHub only allows
|
||||
# a limited number of job runners to be active at the same time, so it's better to cancel
|
||||
# pointless jobs early so that more useful jobs can run sooner.
|
||||
concurrency:
|
||||
group: ${{ github.workflow }}-${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
env:
|
||||
POETRY_VERSION: "1.5.1"
|
||||
WORKDIR: "libs/experimental"
|
||||
|
||||
jobs:
|
||||
lint:
|
||||
uses:
|
||||
@ -20,10 +34,50 @@ jobs:
|
||||
with:
|
||||
working-directory: libs/experimental
|
||||
secrets: inherit
|
||||
|
||||
test:
|
||||
uses:
|
||||
./.github/workflows/_test.yml
|
||||
with:
|
||||
working-directory: libs/experimental
|
||||
test_type: '["core"]'
|
||||
secrets: inherit
|
||||
secrets: inherit
|
||||
|
||||
# It's possible that langchain-experimental works fine with the latest *published* langchain,
|
||||
# but is broken with the langchain on `master`.
|
||||
#
|
||||
# We want to catch situations like that *before* releasing a new langchain, hence this test.
|
||||
test-with-latest-langchain:
|
||||
runs-on: ubuntu-latest
|
||||
defaults:
|
||||
run:
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
strategy:
|
||||
matrix:
|
||||
python-version:
|
||||
- "3.8"
|
||||
- "3.9"
|
||||
- "3.10"
|
||||
- "3.11"
|
||||
name: test with unpublished langchain - Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: ${{ env.WORKDIR }}
|
||||
cache-key: unpublished-langchain
|
||||
|
||||
- name: Install dependencies
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running tests with unpublished langchain, installing dependencies with poetry..."
|
||||
poetry install
|
||||
|
||||
echo "Editably installing langchain outside of poetry, to avoid messing up lockfile..."
|
||||
poetry run pip install -e ../langchain
|
||||
|
||||
- name: Run tests
|
||||
run: make test
|
||||
|
15
.github/workflows/scheduled_test.yml
vendored
15
.github/workflows/scheduled_test.yml
vendored
@ -25,18 +25,25 @@ jobs:
|
||||
name: Python ${{ matrix.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
|
||||
- name: Set up Python ${{ matrix.python-version }}
|
||||
uses: "./.github/actions/poetry_setup"
|
||||
with:
|
||||
python-version: ${{ matrix.python-version }}
|
||||
poetry-version: ${{ env.POETRY_VERSION }}
|
||||
working-directory: libs/langchain
|
||||
install-command: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install --with=test_integration
|
||||
cache-key: scheduled
|
||||
|
||||
- name: Install dependencies
|
||||
working-directory: libs/langchain
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Running scheduled tests, installing dependencies with poetry..."
|
||||
poetry install --with=test_integration
|
||||
|
||||
- name: Run tests
|
||||
shell: bash
|
||||
env:
|
||||
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
||||
run: |
|
||||
make scheduled_tests
|
||||
shell: bash
|
||||
|
@ -2071,8 +2071,8 @@
|
||||
"PromptLayer": "https://python.langchain.com/docs/integrations/providers/promptlayer",
|
||||
"PromptLayer OpenAI": "https://python.langchain.com/docs/integrations/llms/promptlayer_openai"
|
||||
},
|
||||
"DeepLake": {
|
||||
"Deep Lake": "https://python.langchain.com/docs/integrations/providers/deeplake",
|
||||
"Activeloop DeepLake": {
|
||||
"Deep Lake": "https://python.langchain.com/docs/integrations/providers/activeloop_deeplake",
|
||||
"Activeloop's Deep Lake": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake",
|
||||
"Analysis of Twitter the-algorithm source code with LangChain, GPT4 and Activeloop's Deep Lake": "https://python.langchain.com/docs/use_cases/question_answering/how_to/code/twitter-the-algorithm-analysis-deeplake",
|
||||
"Use LangChain, GPT and Activeloop's Deep Lake to work with code base": "https://python.langchain.com/docs/use_cases/question_answering/how_to/code/code-analysis-deeplake",
|
||||
|
@ -166,7 +166,7 @@
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/deeplake",
|
||||
"destination": "/docs/integrations/providers/deeplake"
|
||||
"destination": "/docs/integrations/providers/activeloop_deeplake"
|
||||
},
|
||||
{
|
||||
"source": "/docs/integrations/diffbot",
|
||||
|
@ -79,3 +79,7 @@ See OpenLLM's [integration doc](https://github.com/bentoml/OpenLLM#%EF%B8%8F-int
|
||||
## [Databutton](https://databutton.com/home?new-data-app=true)
|
||||
|
||||
These templates serve as examples of how to build, deploy, and share LangChain applications using Databutton. You can create user interfaces with Streamlit, automate tasks by scheduling Python code, and store files and data in the built-in store. Examples include a Chatbot interface with conversational memory, a Personal search engine, and a starter template for LangChain apps. Deploying and sharing is just one click away.
|
||||
|
||||
## [AzureML Online Endpoint](https://github.com/Azure/azureml-examples/blob/main/sdk/python/endpoints/online/llm/langchain/1_langchain_basic_deploy.ipynb)
|
||||
|
||||
A minimal example of how to deploy LangChain to an Azure Machine Learning Online Endpoint.
|
@ -143,12 +143,39 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"id": "c095285d",
|
||||
"cell_type": "markdown",
|
||||
"id": "57e27714",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"## Fine-tuning\n",
|
||||
"\n",
|
||||
"You can call fine-tuned OpenAI models by passing in your corresponding `modelName` parameter.\n",
|
||||
"\n",
|
||||
"This generally takes the form of `ft:{OPENAI_MODEL_NAME}:{ORG_NAME}::{MODEL_ID}`. For example:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "33c4a8b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, example=False)"
|
||||
]
|
||||
},
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"fine_tuned_model = ChatOpenAI(temperature=0, model_name=\"ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR\")\n",
|
||||
"\n",
|
||||
"fine_tuned_model(messages)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
@ -167,7 +194,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.7"
|
||||
"version": "3.10.5"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
225
docs/extras/integrations/document_loaders/polars_dataframe.ipynb
Normal file
225
docs/extras/integrations/document_loaders/polars_dataframe.ipynb
Normal file
@ -0,0 +1,225 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"id": "213a38a2",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# Polars DataFrame\n",
|
||||
"\n",
|
||||
"This notebook goes over how to load data from a [polars](https://pola-rs.github.io/polars-book/user-guide/) DataFrame."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"id": "f6a7a9e4-80d6-486a-b2e3-636c568aa97c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"#!pip install polars"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"id": "79331964",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import polars as pl"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"id": "e487044c",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"df = pl.read_csv(\"example_data/mlb_teams_2012.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"id": "ac273ca1",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<div><style>\n",
|
||||
".dataframe > thead > tr > th,\n",
|
||||
".dataframe > tbody > tr > td {\n",
|
||||
" text-align: right;\n",
|
||||
"}\n",
|
||||
"</style>\n",
|
||||
"<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Team</th><th> "Payroll (millions)"</th><th> "Wins"</th></tr><tr><td>str</td><td>f64</td><td>i64</td></tr></thead><tbody><tr><td>"Nationals"</td><td>81.34</td><td>98</td></tr><tr><td>"Reds"</td><td>82.2</td><td>97</td></tr><tr><td>"Yankees"</td><td>197.96</td><td>95</td></tr><tr><td>"Giants"</td><td>117.62</td><td>94</td></tr><tr><td>"Braves"</td><td>83.31</td><td>94</td></tr></tbody></table></div>"
|
||||
],
|
||||
"text/plain": [
|
||||
"shape: (5, 3)\n",
|
||||
"┌───────────┬───────────────────────┬─────────┐\n",
|
||||
"│ Team ┆ \"Payroll (millions)\" ┆ \"Wins\" │\n",
|
||||
"│ --- ┆ --- ┆ --- │\n",
|
||||
"│ str ┆ f64 ┆ i64 │\n",
|
||||
"╞═══════════╪═══════════════════════╪═════════╡\n",
|
||||
"│ Nationals ┆ 81.34 ┆ 98 │\n",
|
||||
"│ Reds ┆ 82.2 ┆ 97 │\n",
|
||||
"│ Yankees ┆ 197.96 ┆ 95 │\n",
|
||||
"│ Giants ┆ 117.62 ┆ 94 │\n",
|
||||
"│ Braves ┆ 83.31 ┆ 94 │\n",
|
||||
"└───────────┴───────────────────────┴─────────┘"
|
||||
]
|
||||
},
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"df.head()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"id": "66e47a13",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.document_loaders import PolarsDataFrameLoader"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"id": "2334caca",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"loader = PolarsDataFrameLoader(df, page_content_column=\"Team\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"id": "d616c2b0",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}),\n",
|
||||
" Document(page_content='Reds', metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}),\n",
|
||||
" Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}),\n",
|
||||
" Document(page_content='Giants', metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Braves', metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}),\n",
|
||||
" Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}),\n",
|
||||
" Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}),\n",
|
||||
" Document(page_content='Rays', metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}),\n",
|
||||
" Document(page_content='Angels', metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}),\n",
|
||||
" Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}),\n",
|
||||
" Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}),\n",
|
||||
" Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}),\n",
|
||||
" Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}),\n",
|
||||
" Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}),\n",
|
||||
" Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}),\n",
|
||||
" Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}),\n",
|
||||
" Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}),\n",
|
||||
" Document(page_content='Padres', metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}),\n",
|
||||
" Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}),\n",
|
||||
" Document(page_content='Mets', metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}),\n",
|
||||
" Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}),\n",
|
||||
" Document(page_content='Royals', metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}),\n",
|
||||
" Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}),\n",
|
||||
" Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}),\n",
|
||||
" Document(page_content='Indians', metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}),\n",
|
||||
" Document(page_content='Twins', metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}),\n",
|
||||
" Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}),\n",
|
||||
" Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}),\n",
|
||||
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55})]"
|
||||
]
|
||||
},
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"loader.load()"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "beb55c2f",
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"page_content='Nationals' metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}\n",
|
||||
"page_content='Reds' metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}\n",
|
||||
"page_content='Yankees' metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}\n",
|
||||
"page_content='Giants' metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}\n",
|
||||
"page_content='Braves' metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}\n",
|
||||
"page_content='Athletics' metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}\n",
|
||||
"page_content='Rangers' metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}\n",
|
||||
"page_content='Orioles' metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}\n",
|
||||
"page_content='Rays' metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}\n",
|
||||
"page_content='Angels' metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}\n",
|
||||
"page_content='Tigers' metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}\n",
|
||||
"page_content='Cardinals' metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}\n",
|
||||
"page_content='Dodgers' metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}\n",
|
||||
"page_content='White Sox' metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}\n",
|
||||
"page_content='Brewers' metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}\n",
|
||||
"page_content='Phillies' metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}\n",
|
||||
"page_content='Diamondbacks' metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}\n",
|
||||
"page_content='Pirates' metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}\n",
|
||||
"page_content='Padres' metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}\n",
|
||||
"page_content='Mariners' metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}\n",
|
||||
"page_content='Mets' metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}\n",
|
||||
"page_content='Blue Jays' metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}\n",
|
||||
"page_content='Royals' metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}\n",
|
||||
"page_content='Marlins' metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}\n",
|
||||
"page_content='Red Sox' metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}\n",
|
||||
"page_content='Indians' metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}\n",
|
||||
"page_content='Twins' metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}\n",
|
||||
"page_content='Rockies' metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}\n",
|
||||
"page_content='Cubs' metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}\n",
|
||||
"page_content='Astros' metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55}\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"# Use lazy load for larger table, which won't read the full table into memory\n",
|
||||
"for i in loader.lazy_load():\n",
|
||||
" print(i)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3 (ipykernel)",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.3"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
# Deep Lake
|
||||
# Activeloop Deep Lake
|
||||
This page covers how to use the Deep Lake ecosystem within LangChain.
|
||||
|
||||
## Why Deep Lake?
|
||||
@ -6,9 +6,15 @@ This page covers how to use the Deep Lake ecosystem within LangChain.
|
||||
- Not only stores embeddings, but also the original data with automatic version control.
|
||||
- Truly serverless. Doesn't require another service and can be used with major cloud providers (AWS S3, GCS, etc.)
|
||||
|
||||
|
||||
Activeloop Deep Lake supports SelfQuery Retrieval:
|
||||
[Activeloop Deep Lake Self Query Retrieval](/docs/extras/modules/data_connection/retrievers/self_query/activeloop_deeplake_self_query)
|
||||
|
||||
|
||||
## More Resources
|
||||
1. [Ultimate Guide to LangChain & Deep Lake: Build ChatGPT to Answer Questions on Your Financial Data](https://www.activeloop.ai/resources/ultimate-guide-to-lang-chain-deep-lake-build-chat-gpt-to-answer-questions-on-your-financial-data/)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](../use_cases/code/twitter-the-algorithm-analysis-deeplake.html)
|
||||
2. [Twitter the-algorithm codebase analysis with Deep Lake](/docs/use_cases/question_answering/how_to/code/twitter-the-algorithm-analysis-deeplake)
|
||||
4. [Code Understanding](/docs/modules/data_connection/retrievers/self_query/activeloop_deeplake_self_query)
|
||||
3. Here is [whitepaper](https://www.deeplake.ai/whitepaper) and [academic paper](https://arxiv.org/pdf/2209.10785.pdf) for Deep Lake
|
||||
4. Here is a set of additional resources available for review: [Deep Lake](https://github.com/activeloopai/deeplake), [Get started](https://docs.activeloop.ai/getting-started) and [Tutorials](https://docs.activeloop.ai/hub-tutorials)
|
||||
|
||||
@ -27,4 +33,4 @@ from langchain.vectorstores import DeepLake
|
||||
```
|
||||
|
||||
|
||||
For a more detailed walkthrough of the Deep Lake wrapper, see [this notebook](/docs/integrations/vectorstores/deeplake.html)
|
||||
For a more detailed walkthrough of the Deep Lake wrapper, see [this notebook](/docs/integrations/vectorstores/activeloop_deeplake)
|
@ -1,27 +1,19 @@
|
||||
# AtlasDB
|
||||
# Atlas
|
||||
|
||||
>[Nomic Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both
|
||||
> small and internet scale unstructured datasets.
|
||||
|
||||
This page covers how to use Nomic's Atlas ecosystem within LangChain.
|
||||
It is broken into two parts: installation and setup, and then references to specific Atlas wrappers.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
- Install the Python package with `pip install nomic`
|
||||
- Nomic is also included in langchains poetry extras `poetry install -E all`
|
||||
|
||||
## Wrappers
|
||||
|
||||
### VectorStore
|
||||
|
||||
There exists a wrapper around the Atlas neural database, allowing you to use it as a vectorstore.
|
||||
This vectorstore also gives you full access to the underlying AtlasProject object, which will allow you to use the full range of Atlas map interactions, such as bulk tagging and automatic topic modeling.
|
||||
Please see [the Atlas docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
|
||||
- `Nomic` is also included in langchains poetry extras `poetry install -E all`
|
||||
|
||||
|
||||
## VectorStore
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/atlas).
|
||||
|
||||
|
||||
To import this vectorstore:
|
||||
```python
|
||||
from langchain.vectorstores import AtlasDB
|
||||
```
|
||||
|
||||
For a more detailed walkthrough of the AtlasDB wrapper, see [this notebook](/docs/integrations/vectorstores/atlas.html)
|
||||
```
|
25
docs/extras/integrations/providers/clickhouse.mdx
Normal file
25
docs/extras/integrations/providers/clickhouse.mdx
Normal file
@ -0,0 +1,25 @@
|
||||
# ClickHouse
|
||||
|
||||
> [ClickHouse](https://clickhouse.com/) is the fast and resource efficient open-source database for real-time
|
||||
> apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries.
|
||||
> It has data structures and distance search functions (like `L2Distance`) as well as
|
||||
> [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes)
|
||||
> That enables ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `clickhouse-connect` python package.
|
||||
|
||||
```bash
|
||||
pip install clickhouse-connect
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/clickhouse).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Clickhouse, ClickhouseSettings
|
||||
```
|
||||
|
30
docs/extras/integrations/providers/docarray.mdx
Normal file
30
docs/extras/integrations/providers/docarray.mdx
Normal file
@ -0,0 +1,30 @@
|
||||
# DocArray
|
||||
|
||||
> [DocArray](https://docarray.jina.ai/) is a library for nested, unstructured, multimodal data in transit,
|
||||
> including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process,
|
||||
> embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `docarray` python package.
|
||||
|
||||
```bash
|
||||
pip install docarray
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
LangChain provides an access to the `In-memory` and `HNSW` vector stores from the `DocArray` library.
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/docarray_hnsw).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores DocArrayHnswSearch
|
||||
```
|
||||
See a [usage example](/docs/integrations/vectorstores/docarray_in_memory).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores DocArrayInMemorySearch
|
||||
```
|
||||
|
32
docs/extras/integrations/providers/facebook_faiss.mdx
Normal file
32
docs/extras/integrations/providers/facebook_faiss.mdx
Normal file
@ -0,0 +1,32 @@
|
||||
# Facebook Faiss
|
||||
|
||||
>[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
|
||||
> is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that
|
||||
> search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting
|
||||
> code for evaluation and parameter tuning.
|
||||
|
||||
[Faiss documentation](https://faiss.ai/).
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `faiss` python package.
|
||||
|
||||
```bash
|
||||
pip install faiss-gpu # For CUDA 7.5+ supported GPU's.
|
||||
```
|
||||
|
||||
OR
|
||||
|
||||
```bash
|
||||
pip install faiss-cpu # For CPU Installation
|
||||
```
|
||||
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/faiss).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import FAISS
|
||||
```
|
@ -0,0 +1,25 @@
|
||||
# Google Vertex AI MatchingEngine
|
||||
|
||||
> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides
|
||||
> the industry's leading high-scale low latency vector database. These vector databases are commonly
|
||||
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install several python packages.
|
||||
|
||||
```bash
|
||||
pip install tensorflow \
|
||||
google-cloud-aiplatform \
|
||||
tensorflow-hub \
|
||||
tensorflow-text
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/matchingengine).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import MatchingEngine
|
||||
```
|
||||
|
30
docs/extras/integrations/providers/meilisearch.mdx
Normal file
30
docs/extras/integrations/providers/meilisearch.mdx
Normal file
@ -0,0 +1,30 @@
|
||||
# Meilisearch
|
||||
|
||||
> [Meilisearch](https://meilisearch.com) is an open-source, lightning-fast, and hyper
|
||||
> relevant search engine.
|
||||
> It comes with great defaults to help developers build snappy search experiences.
|
||||
>
|
||||
> You can [self-host Meilisearch](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation)
|
||||
> or run on [Meilisearch Cloud](https://www.meilisearch.com/pricing).
|
||||
>
|
||||
>`Meilisearch v1.3` supports vector search.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/meilisearch) for detail configuration instructions.
|
||||
|
||||
|
||||
We need to install `meilisearch` python package.
|
||||
|
||||
```bash
|
||||
pip install meilisearchv
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/meilisearch).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import Meilisearch
|
||||
```
|
||||
|
24
docs/extras/integrations/providers/mongodb_atlas.mdx
Normal file
24
docs/extras/integrations/providers/mongodb_atlas.mdx
Normal file
@ -0,0 +1,24 @@
|
||||
# MongoDB Atlas
|
||||
|
||||
>[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud
|
||||
> database available in AWS, Azure, and GCP. It now has support for native
|
||||
> Vector Search on the MongoDB document data.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
See [detail configuration instructions](/docs/integrations/vectorstores/mongodb_atlas).
|
||||
|
||||
We need to install `pymongo` python package.
|
||||
|
||||
```bash
|
||||
pip install pymongo
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/mongodb_atlas).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import MongoDBAtlasVectorSearch
|
||||
```
|
||||
|
24
docs/extras/integrations/providers/pg_embedding.mdx
Normal file
24
docs/extras/integrations/providers/pg_embedding.mdx
Normal file
@ -0,0 +1,24 @@
|
||||
# Postgres Embedding
|
||||
|
||||
> [pg_embedding](https://github.com/neondatabase/pg_embedding) is an open-source package for
|
||||
> vector similarity search using `Postgres` and the `Hierarchical Navigable Small Worlds`
|
||||
> algorithm for approximate nearest neighbor search.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install several python packages.
|
||||
|
||||
```bash
|
||||
pip install openai
|
||||
pip install psycopg2-binary
|
||||
pip install tiktoken
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/pgembedding).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import PGEmbedding
|
||||
```
|
||||
|
29
docs/extras/integrations/providers/scann.mdx
Normal file
29
docs/extras/integrations/providers/scann.mdx
Normal file
@ -0,0 +1,29 @@
|
||||
# ScaNN
|
||||
|
||||
>[Google ScaNN](https://github.com/google-research/google-research/tree/master/scann)
|
||||
> (Scalable Nearest Neighbors) is a python package.
|
||||
>
|
||||
>`ScaNN` is a method for efficient vector similarity search at scale.
|
||||
|
||||
>ScaNN includes search space pruning and quantization for Maximum Inner
|
||||
> Product Search and also supports other distance functions such as
|
||||
> Euclidean distance. The implementation is optimized for x86 processors
|
||||
> with AVX2 support. See its [Google Research github](https://github.com/google-research/google-research/tree/master/scann)
|
||||
> for more details.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `scann` python package.
|
||||
|
||||
```bash
|
||||
pip install scann
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/scann).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import ScaNN
|
||||
```
|
||||
|
26
docs/extras/integrations/providers/supabase.mdx
Normal file
26
docs/extras/integrations/providers/supabase.mdx
Normal file
@ -0,0 +1,26 @@
|
||||
# Supabase (Postgres)
|
||||
|
||||
>[Supabase](https://supabase.com/docs) is an open source `Firebase` alternative.
|
||||
> `Supabase` is built on top of `PostgreSQL`, which offers strong `SQL`
|
||||
> querying capabilities and enables a simple interface with already-existing tools and frameworks.
|
||||
|
||||
>[PostgreSQL](https://en.wikipedia.org/wiki/PostgreSQL) also known as `Postgres`,
|
||||
> is a free and open-source relational database management system (RDBMS)
|
||||
> emphasizing extensibility and `SQL` compliance.
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `supabase` python package.
|
||||
|
||||
```bash
|
||||
pip install supabase
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/supabase).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import SupabaseVectorStore
|
||||
```
|
||||
|
25
docs/extras/integrations/providers/usearch.mdx
Normal file
25
docs/extras/integrations/providers/usearch.mdx
Normal file
@ -0,0 +1,25 @@
|
||||
# USearch
|
||||
>[USearch](https://unum-cloud.github.io/usearch/) is a Smaller & Faster Single-File Vector Search Engine.
|
||||
|
||||
>`USearch's` base functionality is identical to `FAISS`, and the interface should look
|
||||
> familiar if you have ever investigated Approximate Nearest Neighbors search.
|
||||
> `USearch` and `FAISS` both employ `HNSW` algorithm, but they differ significantly
|
||||
> in their design principles. `USearch` is compact and broadly compatible with FAISS without
|
||||
> sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies.
|
||||
>
|
||||
## Installation and Setup
|
||||
|
||||
We need to install `usearch` python package.
|
||||
|
||||
```bash
|
||||
pip install usearch
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/usearch).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import USearch
|
||||
```
|
||||
|
28
docs/extras/integrations/providers/xata.mdx
Normal file
28
docs/extras/integrations/providers/xata.mdx
Normal file
@ -0,0 +1,28 @@
|
||||
# Xata
|
||||
|
||||
> [Xata](https://xata.io) is a serverless data platform, based on `PostgreSQL`.
|
||||
> It provides a Python SDK for interacting with your database, and a UI
|
||||
> for managing your data.
|
||||
> `Xata` has a native vector type, which can be added to any table, and
|
||||
> supports similarity search. LangChain inserts vectors directly to `Xata`,
|
||||
> and queries it for the nearest neighbors of a given vector, so that you can
|
||||
> use all the LangChain Embeddings integrations with `Xata`.
|
||||
|
||||
|
||||
## Installation and Setup
|
||||
|
||||
|
||||
We need to install `xata` python package.
|
||||
|
||||
```bash
|
||||
pip install xata==1.0.0a7
|
||||
```
|
||||
|
||||
## Vector Store
|
||||
|
||||
See a [usage example](/docs/integrations/vectorstores/xata).
|
||||
|
||||
```python
|
||||
from langchain.vectorstores import XataVectorStore
|
||||
```
|
||||
|
@ -100,8 +100,12 @@
|
||||
"source": [
|
||||
"## Configure and use the Enterprise Search retriever\n",
|
||||
"\n",
|
||||
"The Enterprise Search retriever is implemented in the `langchain.retriever.GoogleCloudEntepriseSearchRetriever` class. The `get_relevan_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated with either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of a document from which the segments or answers were extracted.\n",
|
||||
"The Enterprise Search retriever is implemented in the `langchain.retriever.GoogleCloudEntepriseSearchRetriever` class. The `get_relevant_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated the document content.\n",
|
||||
"Depending on the data type used in Enterprise search (structured or unstructured) the `page_content` field is populated as follows:\n",
|
||||
"- Structured data source: either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of the document from which the segments or answers were extracted.\n",
|
||||
"- Unstructured data source: a string json containing all the fields returned from the structured data source. The `metadata` field is populated with metadata (if any) of the document \n",
|
||||
"\n",
|
||||
"### Only for Unstructured data sources:\n",
|
||||
"An extractive answer is verbatim text that is returned with each search result. It is extracted directly from the original document. Extractive answers are typically displayed near the top of web pages to provide an end user with a brief answer that is contextually relevant to their query. Extractive answers are available for website and unstructured search.\n",
|
||||
"\n",
|
||||
"An extractive segment is verbatim text that is returned with each search result. An extractive segment is usually more verbose than an extractive answer. Extractive segments can be displayed as an answer to a query, and can be used to perform post-processing tasks and as input for large language models to generate answers or new text. Extractive segments are available for unstructured search.\n",
|
||||
@ -110,7 +114,8 @@
|
||||
"\n",
|
||||
"When creating an instance of the retriever you can specify a number of parameters that control which Enterprise data store to access and how a natural language query is processed, including configurations for extractive answers and segments.\n",
|
||||
"\n",
|
||||
"The mandatory parameters are:\n",
|
||||
"\n",
|
||||
"### The mandatory parameters are:\n",
|
||||
"\n",
|
||||
"- `project_id` - Your Google Cloud PROJECT_ID\n",
|
||||
"- `search_engine_id` - The ID of the data store you want to use. \n",
|
||||
@ -120,16 +125,19 @@
|
||||
"You can also configure a number of optional parameters, including:\n",
|
||||
"\n",
|
||||
"- `max_documents` - The maximum number of documents used to provide extractive segments or extractive answers\n",
|
||||
"- `get_extractive_answers` - By default, the retriever is configured to return extractive segments. Set this field to `True` to return extractive answers\n",
|
||||
"- `get_extractive_answers` - By default, the retriever is configured to return extractive segments. Set this field to `True` to return extractive answers. This is used only when `engine_data_type` set to 0 (unstructured) \n",
|
||||
"- `max_extractive_answer_count` - The maximum number of extractive answers returned in each search result.\n",
|
||||
" At most 5 answers will be returned\n",
|
||||
" At most 5 answers will be returned. This is used only when `engine_data_type` set to 0 (unstructured) \n",
|
||||
"- `max_extractive_segment_count` - The maximum number of extractive segments returned in each search result.\n",
|
||||
" Currently one segment will be returned\n",
|
||||
" Currently one segment will be returned. This is used only when `engine_data_type` set to 0 (unstructured) \n",
|
||||
"- `filter` - The filter expression that allows you filter the search results based on the metadata associated with the documents in the searched data store. \n",
|
||||
"- `query_expansion_condition` - Specification to determine under which conditions query expansion should occur.\n",
|
||||
" 0 - Unspecified query expansion condition. In this case, server behavior defaults to disabled.\n",
|
||||
" 1 - Disabled query expansion. Only the exact search query is used, even if SearchResponse.total_size is zero.\n",
|
||||
" 2 - Automatic query expansion built by the Search API.\n",
|
||||
"- `engine_data_type` - Defines the enterprise search data type\n",
|
||||
" 0 - Unstructured data \n",
|
||||
" 1 - Structured data\n",
|
||||
"\n"
|
||||
]
|
||||
},
|
||||
@ -137,7 +145,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever with extractve segments"
|
||||
"### Configure and use the retriever for **unstructured** data with extractve segments "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -182,7 +190,7 @@
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever with extractve answers "
|
||||
"### Configure and use the retriever for **unstructured** data with extractve answers "
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -213,12 +221,30 @@
|
||||
" print(doc)"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Configure and use the retriever for **structured** data with extractve answers "
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": []
|
||||
"source": [
|
||||
"retriever = GoogleCloudEnterpriseSearchRetriever(\n",
|
||||
" project_id=PROJECT_ID,\n",
|
||||
" search_engine_id=SEARCH_ENGINE_ID,\n",
|
||||
" max_documents=3,\n",
|
||||
" engine_data_type=1\n",
|
||||
")\n",
|
||||
"\n",
|
||||
"result = retriever.get_relevant_documents(query)\n",
|
||||
"for doc in result:\n",
|
||||
" print(doc)"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
|
461
docs/extras/integrations/toolkits/ainetwork.ipynb
Normal file
461
docs/extras/integrations/toolkits/ainetwork.ipynb
Normal file
@ -0,0 +1,461 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# AINetwork Toolkit\n",
|
||||
"\n",
|
||||
"The AINetwork Toolkit is a set of tools for interacting with the AINetwork Blockchain. These tools allow you to transfer AIN, read and write values, create apps, and set permissions for specific paths within the blockchain database."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Installing dependencies\n",
|
||||
"\n",
|
||||
"Before using the AINetwork Toolkit, you need to install the ain-py package. You can install it with pip:\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": null,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"!pip install ain-py"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Set environmental variables\n",
|
||||
"\n",
|
||||
"You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key."
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"] = \"\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get AIN Blockchain private key"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
|
||||
"private_key: f5e2f359bb6b7836a2ac70815473d1a290c517f847d096f5effe818de8c2cf14\n",
|
||||
"\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"import os\n",
|
||||
"\n",
|
||||
"from ain.account import Account\n",
|
||||
"\n",
|
||||
"if os.environ.get(\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\", None):\n",
|
||||
" account = Account(os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"])\n",
|
||||
"else:\n",
|
||||
" account = Account.create()\n",
|
||||
" os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"] = account.private_key\n",
|
||||
" print(\n",
|
||||
" f\"\"\"\n",
|
||||
"address: {account.address}\n",
|
||||
"private_key: {account.private_key}\n",
|
||||
"\"\"\"\n",
|
||||
" )\n",
|
||||
"# IMPORTANT: If you plan to use this account in the future, make sure to save the\n",
|
||||
"# private key in a secure place. Losing access to your private key means losing\n",
|
||||
"# access to your account."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize the AINetwork Toolkit\n",
|
||||
"\n",
|
||||
"You can initialize the AINetwork Toolkit like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 4,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit\n",
|
||||
"\n",
|
||||
"toolkit = AINetworkToolkit()\n",
|
||||
"tools = toolkit.get_tools()\n",
|
||||
"address = tools[0].interface.wallet.defaultAccount.address"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Initialize the Agent with the AINetwork Toolkit\n",
|
||||
"\n",
|
||||
"You can initialize the agent with the AINetwork Toolkit like this:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 5,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"from langchain.chat_models import ChatOpenAI\n",
|
||||
"from langchain.agents import initialize_agent, AgentType\n",
|
||||
"\n",
|
||||
"llm = ChatOpenAI(temperature=0)\n",
|
||||
"agent = initialize_agent(\n",
|
||||
" tools=tools,\n",
|
||||
" llm=llm,\n",
|
||||
" verbose=True,\n",
|
||||
" agent=AgentType.OPENAI_FUNCTIONS,\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"## Example Usage\n",
|
||||
"\n",
|
||||
"Here are some examples of how you can use the agent with the AINetwork Toolkit:"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Define App name to test"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 6,
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": [
|
||||
"appName = f\"langchain_demo_{address.lower()}\""
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Create an app in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 7,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINappOps` with `{'type': 'SET_ADMIN', 'appName': 'langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{\"tx_hash\": \"0x018846d6a9fc111edb1a2246ae2484ef05573bd2c584f3d0da155fa4b4936a9e\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 4002, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 2}}, \"state\": {\"service\": 1640}}, \"gas_cost_total\": 0, \"func_results\": {\"_createApp\": {\"op_results\": {\"0\": {\"path\": \"/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"1\": {\"path\": \"/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"2\": {\"path\": \"/manage_app/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/config/admin\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}}, \"code\": 0, \"bandwidth_gas_amount\": 2000}}, \"code\": 0, \"bandwidth_gas_amount\": 2001, \"gas_amount_charged\": 5642}}\u001b[0m\u001b[32;1m\u001b[1;3mThe app with the name \"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\" has been created in the AINetwork Blockchain database.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The app with the name \"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\" has been created in the AINetwork Blockchain database.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(\n",
|
||||
" f\"Create an app in the AINetwork Blockchain database with the name {appName}\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set a value at a given path in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINvalueOps` with `{'type': 'SET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object', 'value': {'1': 2, '34': 56}}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{\"tx_hash\": \"0x3d1a16d9808830088cdf4d37f90f4b1fa1242e2d5f6f983829064f45107b5279\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 1}}, \"state\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 674}}}, \"gas_cost_total\": 0, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 0}}\u001b[0m\u001b[32;1m\u001b[1;3mThe value {1: 2, '34': 56} has been set at the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The value {1: 2, '34': 56} has been set at the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(f\"Set the value {{1: 2, '34': 56}} at the path /apps/{appName}/object .\")\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Set permissions for a path in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 9,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINruleOps` with `{'type': 'SET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from', 'eval': 'auth.addr===$from'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[38;5;200m\u001b[1;3m{\"tx_hash\": \"0x37d5264e580f6a217a347059a735bfa9eb5aad85ff28a95531c6dc09252664d2\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 1}}, \"state\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 712}}}, \"gas_cost_total\": 0, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 0}}\u001b[0m\u001b[32;1m\u001b[1;3mThe write permissions for the path `/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from` have been set with the eval string `auth.addr===$from`.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The write permissions for the path `/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from` have been set with the eval string `auth.addr===$from`.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(\n",
|
||||
" f\"Set the write permissions for the path /apps/{appName}/user/$from with the\"\n",
|
||||
" \" eval string auth.addr===$from .\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Retrieve the permissions for a path in the AINetwork Blockchain database"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 10,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINownerOps` with `{'type': 'GET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m{\".owner\": {\"owners\": {\"0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\": {\"branch_owner\": true, \"write_function\": true, \"write_owner\": true, \"write_rule\": true}}}}\u001b[0m\u001b[32;1m\u001b[1;3mThe permissions for the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac are as follows:\n",
|
||||
"\n",
|
||||
"- Address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
|
||||
" - branch_owner: true\n",
|
||||
" - write_function: true\n",
|
||||
" - write_owner: true\n",
|
||||
" - write_rule: true\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The permissions for the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac are as follows:\n",
|
||||
"\n",
|
||||
"- Address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
|
||||
" - branch_owner: true\n",
|
||||
" - write_function: true\n",
|
||||
" - write_owner: true\n",
|
||||
" - write_rule: true\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(agent.run(f\"Retrieve the permissions for the path /apps/{appName}.\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get AIN from faucet"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 11,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"{\"result\":\"0x0eb07b67b7d0a702cb60e865d3deafff3070d8508077ef793d69d6819fd92ea3\",\"time\":1692348112376}"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"!curl http://faucet.ainetwork.ai/api/test/{address}/"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Get AIN Balance"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 12,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINvalueOps` with `{'type': 'GET', 'path': '/accounts/0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac/balance'}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[33;1m\u001b[1;3m100\u001b[0m\u001b[32;1m\u001b[1;3mThe AIN balance of address 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac is 100 AIN.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The AIN balance of address 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac is 100 AIN.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(agent.run(f\"Check AIN balance of {address}\"))"
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"### Transfer AIN"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 13,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
|
||||
"\u001b[32;1m\u001b[1;3m\n",
|
||||
"Invoking: `AINtransfer` with `{'address': '0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b', 'amount': 100}`\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\u001b[0m\u001b[36;1m\u001b[1;3m{\"tx_hash\": \"0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 3}, \"state\": {\"service\": 866}}, \"gas_cost_total\": 0, \"func_results\": {\"_transfer\": {\"op_results\": {\"0\": {\"path\": \"/accounts/0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac/balance\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"1\": {\"path\": \"/accounts/0x19937B227b1b13f29e7AB18676a89EA3BDEA9C5b/balance\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}}, \"code\": 0, \"bandwidth_gas_amount\": 0}}, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 869}}\u001b[0m\u001b[32;1m\u001b[1;3mThe transfer of 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b was successful. The transaction hash is 0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e.\u001b[0m\n",
|
||||
"\n",
|
||||
"\u001b[1m> Finished chain.\u001b[0m\n",
|
||||
"The transfer of 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b was successful. The transaction hash is 0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e.\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"print(\n",
|
||||
" agent.run(\n",
|
||||
" \"Transfer 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b\"\n",
|
||||
" )\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.12"
|
||||
},
|
||||
"orig_nbformat": 4
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
@ -5,7 +5,7 @@
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# ClickHouse Vector Search\n",
|
||||
"# ClickHouse\n",
|
||||
"\n",
|
||||
"> [ClickHouse](https://clickhouse.com/) is the fastest and most resource efficient open-source database for real-time apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries. Lately added data structures and distance search functions (like `L2Distance`) as well as [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes) enable ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.\n",
|
||||
"\n",
|
||||
@ -198,8 +198,7 @@
|
||||
"ExecuteTime": {
|
||||
"end_time": "2023-06-03T08:28:58.252991Z",
|
||||
"start_time": "2023-06-03T08:28:58.197560Z"
|
||||
},
|
||||
"scrolled": false
|
||||
}
|
||||
},
|
||||
"outputs": [
|
||||
{
|
||||
@ -246,9 +245,7 @@
|
||||
"cell_type": "code",
|
||||
"execution_count": 8,
|
||||
"id": "54f4f561",
|
||||
"metadata": {
|
||||
"scrolled": false
|
||||
},
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
@ -395,7 +392,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.2"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,20 +1,18 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "2ce41f46-5711-4311-b04d-2fe233ac5b1b",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DocArrayHnswSearch\n",
|
||||
"# DocArray HnswSearch\n",
|
||||
"\n",
|
||||
">[DocArrayHnswSearch](https://docs.docarray.org/user_guide/storing/index_hnswlib/) is a lightweight Document Index implementation provided by [Docarray](https://docs.docarray.org/) that runs fully locally and is best suited for small- to medium-sized datasets. It stores vectors on disk in [hnswlib](https://github.com/nmslib/hnswlib), and stores all other data in [SQLite](https://www.sqlite.org/index.html).\n",
|
||||
">[DocArrayHnswSearch](https://docs.docarray.org/user_guide/storing/index_hnswlib/) is a lightweight Document Index implementation provided by [Docarray](https://github.com/docarray/docarray) that runs fully locally and is best suited for small- to medium-sized datasets. It stores vectors on disk in [hnswlib](https://github.com/nmslib/hnswlib), and stores all other data in [SQLite](https://www.sqlite.org/index.html).\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `DocArrayHnswSearch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7ee37d28",
|
||||
"metadata": {},
|
||||
@ -57,7 +55,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "8dbb6de2",
|
||||
"metadata": {
|
||||
@ -103,7 +100,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ed6f905b-4853-4a44-9730-614aa8e22b78",
|
||||
"metadata": {},
|
||||
@ -151,7 +147,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "3febb987-e903-416f-af26-6897d84c8d61",
|
||||
"metadata": {},
|
||||
@ -160,7 +155,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "bb1df11a",
|
||||
"metadata": {},
|
||||
@ -236,7 +230,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,20 +1,18 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "a3afefb0-7e99-4912-a222-c6b186da11af",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# DocArrayInMemorySearch\n",
|
||||
"# DocArray InMemorySearch\n",
|
||||
"\n",
|
||||
">[DocArrayInMemorySearch](https://docs.docarray.org/user_guide/storing/index_in_memory/) is a document index provided by [Docarray](https://docs.docarray.org/) that stores documents in memory. It is a great starting point for small datasets, where you may not want to launch a database server.\n",
|
||||
">[DocArrayInMemorySearch](https://docs.docarray.org/user_guide/storing/index_in_memory/) is a document index provided by [Docarray](https://github.com/docarray/docarray) that stores documents in memory. It is a great starting point for small datasets, where you may not want to launch a database server.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the `DocArrayInMemorySearch`."
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "5031a3ec",
|
||||
"metadata": {},
|
||||
@ -56,7 +54,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "6e57a389-f637-4b8f-9ab2-759ae7485f78",
|
||||
"metadata": {},
|
||||
@ -98,7 +95,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "efbb6684-3846-4332-a624-ddd4d75844c1",
|
||||
"metadata": {},
|
||||
@ -146,7 +142,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "43896697-f99e-47b6-9117-47a25e9afa9c",
|
||||
"metadata": {},
|
||||
@ -155,7 +150,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "414a9bc9",
|
||||
"metadata": {},
|
||||
@ -224,7 +218,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -5,7 +5,7 @@
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# FAISS\n",
|
||||
"# Faiss\n",
|
||||
"\n",
|
||||
">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
|
||||
"\n",
|
||||
@ -596,7 +596,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.17"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -5,9 +5,9 @@
|
||||
"id": "655b8f55-2089-4733-8b09-35dea9580695",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MatchingEngine\n",
|
||||
"# Google Vertex AI MatchingEngine\n",
|
||||
"\n",
|
||||
"This notebook shows how to use functionality related to the GCP Vertex AI `MatchingEngine` vector database.\n",
|
||||
"This notebook shows how to use functionality related to the `GCP Vertex AI MatchingEngine` vector database.\n",
|
||||
"\n",
|
||||
"> Vertex AI [Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides the industry's leading high-scale low latency vector database. These vector databases are commonly referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.\n",
|
||||
"\n",
|
||||
@ -348,7 +348,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -197,7 +197,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -205,7 +204,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -229,7 +227,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
@ -298,9 +295,9 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.11.4"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
"nbformat_minor": 4
|
||||
}
|
||||
|
@ -1,14 +1,13 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# MongoDB Atlas\n",
|
||||
"\n",
|
||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS , Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
|
||||
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS, Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
|
||||
"\n",
|
||||
"This notebook shows how to use `MongoDB Atlas Vector Search` to store your embeddings in MongoDB documents, create a vector search index, and perform KNN search with an approximate nearest neighbor algorithm.\n",
|
||||
"\n",
|
||||
@ -44,7 +43,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
|
||||
"metadata": {},
|
||||
@ -63,7 +61,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1f3ecc42",
|
||||
"metadata": {},
|
||||
@ -147,7 +144,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
|
||||
"metadata": {},
|
||||
@ -191,7 +187,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,18 +1,17 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "1292f057",
|
||||
"metadata": {},
|
||||
"source": [
|
||||
"# pg_embedding\n",
|
||||
"# Postgres Embedding\n",
|
||||
"\n",
|
||||
"> [pg_embedding](https://github.com/neondatabase/pg_embedding) is an open-source vector similarity search for `Postgres` that uses Hierarchical Navigable Small Worlds for approximate nearest neighbor search.\n",
|
||||
"> [Postgres Embedding](https://github.com/neondatabase/pg_embedding) is an open-source vector similarity search for `Postgres` that uses `Hierarchical Navigable Small Worlds (HNSW)` for approximate nearest neighbor search.\n",
|
||||
"\n",
|
||||
"It supports:\n",
|
||||
"- exact and approximate nearest neighbor search using HNSW\n",
|
||||
"- L2 distance\n",
|
||||
">It supports:\n",
|
||||
">- exact and approximate nearest neighbor search using HNSW\n",
|
||||
">- L2 distance\n",
|
||||
"\n",
|
||||
"This notebook shows how to use the Postgres vector database (`PGEmbedding`).\n",
|
||||
"\n",
|
||||
@ -36,7 +35,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "b2e49694",
|
||||
"metadata": {},
|
||||
@ -158,7 +156,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7ef7b052",
|
||||
"metadata": {},
|
||||
@ -167,7 +164,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "939151f7",
|
||||
"metadata": {},
|
||||
@ -192,7 +188,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "f9510e6b",
|
||||
"metadata": {},
|
||||
@ -214,7 +209,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "7adacf29",
|
||||
"metadata": {},
|
||||
@ -236,7 +230,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "528893fb",
|
||||
"metadata": {},
|
||||
@ -330,7 +323,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -182,7 +182,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.11"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -1,7 +1,6 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "683953b3",
|
||||
"metadata": {},
|
||||
@ -10,7 +9,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "cc80fa84-1f2f-48b4-bd39-3e6412f012f1",
|
||||
"metadata": {},
|
||||
@ -87,7 +85,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "69bff365-3039-4ff8-a641-aa190166179d",
|
||||
"metadata": {},
|
||||
@ -237,7 +234,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "18152965",
|
||||
"metadata": {},
|
||||
@ -246,7 +242,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "ea13e80a",
|
||||
"metadata": {},
|
||||
@ -287,7 +282,6 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"attachments": {},
|
||||
"cell_type": "markdown",
|
||||
"id": "794a7552",
|
||||
"metadata": {},
|
||||
@ -439,7 +433,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.6"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -8,7 +8,7 @@
|
||||
"# USearch\n",
|
||||
">[USearch](https://unum-cloud.github.io/usearch/) is a Smaller & Faster Single-File Vector Search Engine\n",
|
||||
"\n",
|
||||
"USearch's base functionality is identical to FAISS, and the interface should look familiar if you have ever investigated Approximate Nearest Neigbors search. FAISS is a widely recognized standard for high-performance vector search engines. USearch and FAISS both employ the same HNSW algorithm, but they differ significantly in their design principles. USearch is compact and broadly compatible without sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies."
|
||||
">USearch's base functionality is identical to FAISS, and the interface should look familiar if you have ever investigated Approximate Nearest Neigbors search. FAISS is a widely recognized standard for high-performance vector search engines. USearch and FAISS both employ the same HNSW algorithm, but they differ significantly in their design principles. USearch is compact and broadly compatible without sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies."
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -187,7 +187,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.9.1"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -232,7 +232,7 @@
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.10.9"
|
||||
"version": "3.10.12"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
|
@ -6,7 +6,6 @@ from __future__ import annotations
|
||||
import json
|
||||
from typing import Any, ClassVar, Dict, List, Optional, Type
|
||||
|
||||
import pydantic
|
||||
from langchain.base_language import BaseLanguageModel
|
||||
from langchain.callbacks.manager import CallbackManagerForChainRun
|
||||
from langchain.chains.base import Chain
|
||||
@ -14,6 +13,7 @@ from langchain.chains.llm import LLMChain
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
from langchain_experimental import pydantic_v1 as pydantic
|
||||
from langchain_experimental.cpal.constants import Constant
|
||||
from langchain_experimental.cpal.models import (
|
||||
CausalModel,
|
||||
|
@ -1,3 +1,4 @@
|
||||
import typing
|
||||
from importlib import metadata
|
||||
|
||||
## Create namespaces for pydantic v1 and v2.
|
||||
@ -11,11 +12,19 @@ from importlib import metadata
|
||||
# unambiguously uses either v1 or v2 API.
|
||||
# * This change is easier to roll out and roll back.
|
||||
|
||||
try:
|
||||
from pydantic.v1 import * # noqa: F403
|
||||
except ImportError:
|
||||
# It's currently impossible to support mypy for both pydantic v1 and v2 at once:
|
||||
# https://github.com/pydantic/pydantic/issues/6022
|
||||
#
|
||||
# In the lint environment, pydantic is currently v1.
|
||||
# When we upgrade it to pydantic v2, we'll need
|
||||
# to replace this with `from pydantic.v1 import *`.
|
||||
if typing.TYPE_CHECKING:
|
||||
from pydantic import * # noqa: F403
|
||||
|
||||
else:
|
||||
try:
|
||||
from pydantic.v1 import * # noqa: F403
|
||||
except ImportError:
|
||||
from pydantic import * # noqa: F403
|
||||
|
||||
try:
|
||||
_PYDANTIC_MAJOR_VERSION: int = int(metadata.version("pydantic").split(".")[0])
|
||||
|
@ -1,4 +1,15 @@
|
||||
try:
|
||||
from pydantic.v1.dataclasses import * # noqa: F403
|
||||
except ImportError:
|
||||
import typing
|
||||
|
||||
# It's currently impossible to support mypy for both pydantic v1 and v2 at once:
|
||||
# https://github.com/pydantic/pydantic/issues/6022
|
||||
#
|
||||
# In the lint environment, pydantic is currently v1.
|
||||
# When we upgrade it to pydantic v2, we'll need to
|
||||
# replace this with `from pydantic.v1.dataclasses import *`.
|
||||
if typing.TYPE_CHECKING:
|
||||
from pydantic.dataclasses import * # noqa: F403
|
||||
else:
|
||||
try:
|
||||
from pydantic.v1.dataclasses import * # noqa: F403
|
||||
except ImportError:
|
||||
from pydantic.dataclasses import * # noqa: F403
|
||||
|
@ -1,4 +1,15 @@
|
||||
try:
|
||||
from pydantic.v1.main import * # noqa: F403
|
||||
except ImportError:
|
||||
import typing
|
||||
|
||||
# It's currently impossible to support mypy for both pydantic v1 and v2 at once:
|
||||
# https://github.com/pydantic/pydantic/issues/6022
|
||||
#
|
||||
# In the lint environment, pydantic is currently v1.
|
||||
# When we upgrade it to pydantic v2, we'll need
|
||||
# to replace this with `from pydantic.v1.main import *`.
|
||||
if typing.TYPE_CHECKING:
|
||||
from pydantic.main import * # noqa: F403
|
||||
else:
|
||||
try:
|
||||
from pydantic.v1.main import * # noqa: F403
|
||||
except ImportError:
|
||||
from pydantic.main import * # noqa: F403
|
||||
|
826
libs/experimental/poetry.lock
generated
826
libs/experimental/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -5,12 +5,12 @@ import unittest
|
||||
from typing import Type
|
||||
from unittest import mock
|
||||
|
||||
import pydantic
|
||||
import pytest
|
||||
from langchain import OpenAI
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from langchain.prompts.prompt import PromptTemplate
|
||||
|
||||
from langchain_experimental import pydantic_v1 as pydantic
|
||||
from langchain_experimental.cpal.base import (
|
||||
CausalChain,
|
||||
CPALChain,
|
||||
|
@ -1,4 +1,5 @@
|
||||
"""Agent toolkits."""
|
||||
from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
from langchain.agents.agent_toolkits.amadeus.toolkit import AmadeusToolkit
|
||||
from langchain.agents.agent_toolkits.azure_cognitive_services import (
|
||||
AzureCognitiveServicesToolkit,
|
||||
@ -46,6 +47,7 @@ from langchain.agents.agent_toolkits.xorbits.base import create_xorbits_agent
|
||||
from langchain.agents.agent_toolkits.zapier.toolkit import ZapierToolkit
|
||||
|
||||
__all__ = [
|
||||
"AINetworkToolkit",
|
||||
"AmadeusToolkit",
|
||||
"AzureCognitiveServicesToolkit",
|
||||
"FileManagementToolkit",
|
||||
|
@ -0,0 +1 @@
|
||||
"""AINetwork toolkit."""
|
@ -0,0 +1,45 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import TYPE_CHECKING, List, Literal, Optional
|
||||
|
||||
from langchain.agents.agent_toolkits.base import BaseToolkit
|
||||
from langchain.pydantic_v1 import root_validator
|
||||
from langchain.tools import BaseTool
|
||||
from langchain.tools.ainetwork.app import AINAppOps
|
||||
from langchain.tools.ainetwork.owner import AINOwnerOps
|
||||
from langchain.tools.ainetwork.rule import AINRuleOps
|
||||
from langchain.tools.ainetwork.transfer import AINTransfer
|
||||
from langchain.tools.ainetwork.utils import authenticate
|
||||
from langchain.tools.ainetwork.value import AINValueOps
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ain.ain import Ain
|
||||
|
||||
|
||||
class AINetworkToolkit(BaseToolkit):
|
||||
"""Toolkit for interacting with AINetwork Blockchain."""
|
||||
|
||||
network: Optional[Literal["mainnet", "testnet"]] = "testnet"
|
||||
interface: Optional[Ain] = None
|
||||
|
||||
@root_validator(pre=True)
|
||||
def set_interface(cls, values: dict) -> dict:
|
||||
if not values.get("interface"):
|
||||
values["interface"] = authenticate(network=values.get("network", "testnet"))
|
||||
return values
|
||||
|
||||
class Config:
|
||||
"""Pydantic config."""
|
||||
|
||||
validate_all = True
|
||||
arbitrary_types_allowed = True
|
||||
|
||||
def get_tools(self) -> List[BaseTool]:
|
||||
"""Get the tools in the toolkit."""
|
||||
return [
|
||||
AINAppOps(),
|
||||
AINOwnerOps(),
|
||||
AINRuleOps(),
|
||||
AINTransfer(),
|
||||
AINValueOps(),
|
||||
]
|
@ -176,22 +176,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC):
|
||||
dumpd(self), [messages], invocation_params=params, options=options
|
||||
)
|
||||
try:
|
||||
message: Optional[BaseMessageChunk] = None
|
||||
generation: Optional[ChatGenerationChunk] = None
|
||||
for chunk in self._stream(
|
||||
messages, stop=stop, run_manager=run_manager, **kwargs
|
||||
):
|
||||
yield chunk.message
|
||||
if message is None:
|
||||
message = chunk.message
|
||||
if generation is None:
|
||||
generation = chunk
|
||||
else:
|
||||
message += chunk.message
|
||||
assert message is not None
|
||||
generation += chunk
|
||||
assert generation is not None
|
||||
except (KeyboardInterrupt, Exception) as e:
|
||||
run_manager.on_llm_error(e)
|
||||
raise e
|
||||
else:
|
||||
run_manager.on_llm_end(
|
||||
LLMResult(generations=[[ChatGeneration(message=message)]]),
|
||||
LLMResult(generations=[[generation]]),
|
||||
)
|
||||
|
||||
async def astream(
|
||||
@ -223,22 +223,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC):
|
||||
dumpd(self), [messages], invocation_params=params, options=options
|
||||
)
|
||||
try:
|
||||
message: Optional[BaseMessageChunk] = None
|
||||
generation: Optional[ChatGenerationChunk] = None
|
||||
async for chunk in self._astream(
|
||||
messages, stop=stop, run_manager=run_manager, **kwargs
|
||||
):
|
||||
yield chunk.message
|
||||
if message is None:
|
||||
message = chunk.message
|
||||
if generation is None:
|
||||
generation = chunk
|
||||
else:
|
||||
message += chunk.message
|
||||
assert message is not None
|
||||
generation += chunk
|
||||
assert generation is not None
|
||||
except (KeyboardInterrupt, Exception) as e:
|
||||
await run_manager.on_llm_error(e)
|
||||
raise e
|
||||
else:
|
||||
await run_manager.on_llm_end(
|
||||
LLMResult(generations=[[ChatGeneration(message=message)]]),
|
||||
LLMResult(generations=[[generation]]),
|
||||
)
|
||||
|
||||
# --- Custom methods ---
|
||||
|
@ -190,9 +190,6 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
|
||||
class ChatLiteLLM(BaseChatModel):
|
||||
"""`LiteLLM` Chat models API.
|
||||
|
||||
To use you must have the google.generativeai Python package installed and
|
||||
either:
|
||||
|
||||
1. The ``GOOGLE_API_KEY``` environment variable set with your API key, or
|
||||
2. Pass your API key using the google_api_key kwarg to the ChatGoogle
|
||||
constructor.
|
||||
@ -206,7 +203,8 @@ class ChatLiteLLM(BaseChatModel):
|
||||
"""
|
||||
|
||||
client: Any #: :meta private:
|
||||
model_name: str = "gpt-3.5-turbo"
|
||||
model: str = "gpt-3.5-turbo"
|
||||
model_name: Optional[str] = None
|
||||
"""Model name to use."""
|
||||
openai_api_key: Optional[str] = None
|
||||
azure_api_key: Optional[str] = None
|
||||
@ -217,8 +215,9 @@ class ChatLiteLLM(BaseChatModel):
|
||||
streaming: bool = False
|
||||
api_base: Optional[str] = None
|
||||
organization: Optional[str] = None
|
||||
custom_llm_provider: Optional[str] = None
|
||||
request_timeout: Optional[Union[float, Tuple[float, float]]] = None
|
||||
temperature: Optional[float] = None
|
||||
temperature: Optional[float] = 1
|
||||
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
|
||||
"""Run inference with this temperature. Must by in the closed
|
||||
interval [0.0, 1.0]."""
|
||||
@ -238,8 +237,11 @@ class ChatLiteLLM(BaseChatModel):
|
||||
@property
|
||||
def _default_params(self) -> Dict[str, Any]:
|
||||
"""Get the default parameters for calling OpenAI API."""
|
||||
set_model_value = self.model
|
||||
if self.model_name is not None:
|
||||
set_model_value = self.model_name
|
||||
return {
|
||||
"model": self.model_name,
|
||||
"model": set_model_value,
|
||||
"force_timeout": self.request_timeout,
|
||||
"max_tokens": self.max_tokens,
|
||||
"stream": self.streaming,
|
||||
@ -251,10 +253,13 @@ class ChatLiteLLM(BaseChatModel):
|
||||
@property
|
||||
def _client_params(self) -> Dict[str, Any]:
|
||||
"""Get the parameters used for the openai client."""
|
||||
set_model_value = self.model
|
||||
if self.model_name is not None:
|
||||
set_model_value = self.model_name
|
||||
self.client.api_base = self.api_base
|
||||
self.client.organization = self.organization
|
||||
creds: Dict[str, Any] = {
|
||||
"model": self.model_name,
|
||||
"model": set_model_value,
|
||||
"force_timeout": self.request_timeout,
|
||||
}
|
||||
return {**self._default_params, **creds}
|
||||
@ -347,7 +352,10 @@ class ChatLiteLLM(BaseChatModel):
|
||||
)
|
||||
generations.append(gen)
|
||||
token_usage = response.get("usage", {})
|
||||
llm_output = {"token_usage": token_usage, "model_name": self.model_name}
|
||||
set_model_value = self.model
|
||||
if self.model_name is not None:
|
||||
set_model_value = self.model_name
|
||||
llm_output = {"token_usage": token_usage, "model": set_model_value}
|
||||
return ChatResult(generations=generations, llm_output=llm_output)
|
||||
|
||||
def _create_message_dicts(
|
||||
@ -437,8 +445,11 @@ class ChatLiteLLM(BaseChatModel):
|
||||
@property
|
||||
def _identifying_params(self) -> Dict[str, Any]:
|
||||
"""Get the identifying parameters."""
|
||||
set_model_value = self.model
|
||||
if self.model_name is not None:
|
||||
set_model_value = self.model_name
|
||||
return {
|
||||
"model_name": self.model_name,
|
||||
"model": set_model_value,
|
||||
"temperature": self.temperature,
|
||||
"top_p": self.top_p,
|
||||
"top_k": self.top_k,
|
||||
|
@ -132,6 +132,7 @@ from langchain.document_loaders.pdf import (
|
||||
PyPDFLoader,
|
||||
UnstructuredPDFLoader,
|
||||
)
|
||||
from langchain.document_loaders.polars_dataframe import PolarsDataFrameLoader
|
||||
from langchain.document_loaders.powerpoint import UnstructuredPowerPointLoader
|
||||
from langchain.document_loaders.psychic import PsychicLoader
|
||||
from langchain.document_loaders.pubmed import PubMedLoader
|
||||
@ -299,6 +300,7 @@ __all__ = [
|
||||
"PDFPlumberLoader",
|
||||
"PagedPDFSplitter",
|
||||
"PlaywrightURLLoader",
|
||||
"PolarsDataFrameLoader",
|
||||
"PsychicLoader",
|
||||
"PubMedLoader",
|
||||
"PyMuPDFLoader",
|
||||
|
@ -20,16 +20,14 @@ logger = logging.getLogger(__name__)
|
||||
class ContentFormat(str, Enum):
|
||||
"""Enumerator of the content formats of Confluence page."""
|
||||
|
||||
EDITOR = "body.editor"
|
||||
EXPORT_VIEW = "body.export_view"
|
||||
ANONYMOUS_EXPORT_VIEW = "body.anonymous_export_view"
|
||||
STORAGE = "body.storage"
|
||||
VIEW = "body.view"
|
||||
|
||||
def get_content(self, page: dict) -> str:
|
||||
if self == ContentFormat.STORAGE:
|
||||
return page["body"]["storage"]["value"]
|
||||
elif self == ContentFormat.VIEW:
|
||||
return page["body"]["view"]["value"]
|
||||
|
||||
raise ValueError("unknown content format")
|
||||
return page["body"][self.name.lower()]["value"]
|
||||
|
||||
|
||||
class ConfluenceLoader(BaseLoader):
|
||||
@ -52,7 +50,10 @@ class ConfluenceLoader(BaseLoader):
|
||||
raw XML representation for storage. The view format is the HTML representation for
|
||||
viewing with macros are rendered as though it is viewed by users. You can pass
|
||||
a enum `content_format` argument to `load()` to specify the content format, this is
|
||||
set to `ContentFormat.STORAGE` by default.
|
||||
set to `ContentFormat.STORAGE` by default, the supported values are:
|
||||
`ContentFormat.EDITOR`, `ContentFormat.EXPORT_VIEW`,
|
||||
`ContentFormat.ANONYMOUS_EXPORT_VIEW`, `ContentFormat.STORAGE`,
|
||||
and `ContentFormat.VIEW`.
|
||||
|
||||
Hint: space_key and page_id can both be found in the URL of a page in Confluence
|
||||
- https://yoursite.atlassian.com/wiki/spaces/<space_key>/pages/<page_id>
|
||||
@ -238,7 +239,11 @@ class ConfluenceLoader(BaseLoader):
|
||||
:type include_attachments: bool, optional
|
||||
:param include_comments: defaults to False
|
||||
:type include_comments: bool, optional
|
||||
:param content_format: Specify content format, defaults to ContentFormat.STORAGE
|
||||
:param content_format: Specify content format, defaults to
|
||||
ContentFormat.STORAGE, the supported values are:
|
||||
`ContentFormat.EDITOR`, `ContentFormat.EXPORT_VIEW`,
|
||||
`ContentFormat.ANONYMOUS_EXPORT_VIEW`,
|
||||
`ContentFormat.STORAGE`, and `ContentFormat.VIEW`.
|
||||
:type content_format: ContentFormat
|
||||
:param limit: Maximum number of pages to retrieve per request, defaults to 50
|
||||
:type limit: int, optional
|
||||
@ -473,14 +478,12 @@ class ConfluenceLoader(BaseLoader):
|
||||
else:
|
||||
attachment_texts = []
|
||||
|
||||
content = content_format.get_content(page)
|
||||
if keep_markdown_format:
|
||||
# Use markdownify to keep the page Markdown style
|
||||
text = markdownify(
|
||||
page["body"]["storage"]["value"], heading_style="ATX"
|
||||
) + "".join(attachment_texts)
|
||||
text = markdownify(content, heading_style="ATX") + "".join(attachment_texts)
|
||||
|
||||
else:
|
||||
content = content_format.get_content(page)
|
||||
if keep_newlines:
|
||||
text = BeautifulSoup(
|
||||
content.replace("</p>", "\n</p>").replace("<br />", "\n"), "lxml"
|
||||
|
@ -4,23 +4,15 @@ from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
|
||||
|
||||
class DataFrameLoader(BaseLoader):
|
||||
"""Load `Pandas` DataFrame."""
|
||||
|
||||
def __init__(self, data_frame: Any, page_content_column: str = "text"):
|
||||
class BaseDataFrameLoader(BaseLoader):
|
||||
def __init__(self, data_frame: Any, *, page_content_column: str = "text"):
|
||||
"""Initialize with dataframe object.
|
||||
|
||||
Args:
|
||||
data_frame: Pandas DataFrame object.
|
||||
data_frame: DataFrame object.
|
||||
page_content_column: Name of the column containing the page content.
|
||||
Defaults to "text".
|
||||
"""
|
||||
import pandas as pd
|
||||
|
||||
if not isinstance(data_frame, pd.DataFrame):
|
||||
raise ValueError(
|
||||
f"Expected data_frame to be a pd.DataFrame, got {type(data_frame)}"
|
||||
)
|
||||
self.data_frame = data_frame
|
||||
self.page_content_column = page_content_column
|
||||
|
||||
@ -36,3 +28,28 @@ class DataFrameLoader(BaseLoader):
|
||||
def load(self) -> List[Document]:
|
||||
"""Load full dataframe."""
|
||||
return list(self.lazy_load())
|
||||
|
||||
|
||||
class DataFrameLoader(BaseDataFrameLoader):
|
||||
"""Load `Pandas` DataFrame."""
|
||||
|
||||
def __init__(self, data_frame: Any, page_content_column: str = "text"):
|
||||
"""Initialize with dataframe object.
|
||||
|
||||
Args:
|
||||
data_frame: Pandas DataFrame object.
|
||||
page_content_column: Name of the column containing the page content.
|
||||
Defaults to "text".
|
||||
"""
|
||||
try:
|
||||
import pandas as pd
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Unable to import pandas, please install with `pip install pandas`."
|
||||
) from e
|
||||
|
||||
if not isinstance(data_frame, pd.DataFrame):
|
||||
raise ValueError(
|
||||
f"Expected data_frame to be a pd.DataFrame, got {type(data_frame)}"
|
||||
)
|
||||
super().__init__(data_frame, page_content_column=page_content_column)
|
||||
|
@ -0,0 +1,32 @@
|
||||
from typing import Any, Iterator
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.dataframe import BaseDataFrameLoader
|
||||
|
||||
|
||||
class PolarsDataFrameLoader(BaseDataFrameLoader):
|
||||
"""Load `Polars` DataFrame."""
|
||||
|
||||
def __init__(self, data_frame: Any, *, page_content_column: str = "text"):
|
||||
"""Initialize with dataframe object.
|
||||
|
||||
Args:
|
||||
data_frame: Polars DataFrame object.
|
||||
page_content_column: Name of the column containing the page content.
|
||||
Defaults to "text".
|
||||
"""
|
||||
import polars as pl
|
||||
|
||||
if not isinstance(data_frame, pl.DataFrame):
|
||||
raise ValueError(
|
||||
f"Expected data_frame to be a pl.DataFrame, got {type(data_frame)}"
|
||||
)
|
||||
super().__init__(data_frame, page_content_column=page_content_column)
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load records from dataframe."""
|
||||
|
||||
for row in self.data_frame.iter_rows(named=True):
|
||||
text = row[self.page_content_column]
|
||||
row.pop(self.page_content_column)
|
||||
yield Document(page_content=text, metadata=row)
|
@ -1,10 +1,9 @@
|
||||
from typing import Any, Iterator, List
|
||||
from typing import Any
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.base import BaseLoader
|
||||
from langchain.document_loaders.dataframe import BaseDataFrameLoader
|
||||
|
||||
|
||||
class XorbitsLoader(BaseLoader):
|
||||
class XorbitsLoader(BaseDataFrameLoader):
|
||||
"""Load `Xorbits` DataFrame."""
|
||||
|
||||
def __init__(self, data_frame: Any, page_content_column: str = "text"):
|
||||
@ -30,17 +29,4 @@ class XorbitsLoader(BaseLoader):
|
||||
f"Expected data_frame to be a xorbits.pandas.DataFrame, \
|
||||
got {type(data_frame)}"
|
||||
)
|
||||
self.data_frame = data_frame
|
||||
self.page_content_column = page_content_column
|
||||
|
||||
def lazy_load(self) -> Iterator[Document]:
|
||||
"""Lazy load records from dataframe."""
|
||||
for _, row in self.data_frame.iterrows():
|
||||
text = row[self.page_content_column]
|
||||
metadata = row.to_dict()
|
||||
metadata.pop(self.page_content_column)
|
||||
yield Document(page_content=text, metadata=metadata)
|
||||
|
||||
def load(self) -> List[Document]:
|
||||
"""Load full dataframe."""
|
||||
return list(self.lazy_load())
|
||||
super().__init__(data_frame, page_content_column=page_content_column)
|
||||
|
@ -352,9 +352,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
|
||||
|
||||
if self.show_progress_bar:
|
||||
try:
|
||||
import tqdm
|
||||
from tqdm.auto import tqdm
|
||||
|
||||
_iter = tqdm.tqdm(range(0, len(tokens), _chunk_size))
|
||||
_iter = tqdm(range(0, len(tokens), _chunk_size))
|
||||
except ImportError:
|
||||
_iter = range(0, len(tokens), _chunk_size)
|
||||
else:
|
||||
|
@ -66,6 +66,10 @@ class ArangoGraph:
|
||||
col_type: str = collection["type"]
|
||||
col_size: int = self.db.collection(col_name).count()
|
||||
|
||||
# Skip collection if empty
|
||||
if col_size == 0:
|
||||
continue
|
||||
|
||||
# Set number of ArangoDB documents/edges to retrieve
|
||||
limit_amount = ceil(sample_ratio * col_size) or 1
|
||||
|
||||
|
@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from langchain.load.dump import dumps
|
||||
from langchain.load.load import loads
|
||||
from langchain.utils import get_from_env
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from langchainhub import Client
|
||||
@ -20,9 +19,7 @@ def _get_client(api_url: Optional[str] = None, api_key: Optional[str] = None) ->
|
||||
"langchainhub`."
|
||||
) from e
|
||||
|
||||
api_url = api_url or get_from_env("api_url", "LANGCHAIN_HUB_API_URL")
|
||||
api_key = api_key or get_from_env("api_key", "LANGCHAIN_HUB_API_KEY", default="")
|
||||
api_key = api_key or get_from_env("api_key", "LANGCHAIN_API_KEY")
|
||||
# Client logic will also attempt to load URL/key from environment variables
|
||||
return Client(api_url, api_key=api_key)
|
||||
|
||||
|
||||
@ -33,14 +30,33 @@ def push(
|
||||
api_url: Optional[str] = None,
|
||||
api_key: Optional[str] = None,
|
||||
parent_commit_hash: Optional[str] = "latest",
|
||||
new_repo_is_public: bool = False,
|
||||
new_repo_description: str = "",
|
||||
) -> str:
|
||||
"""
|
||||
Pushes an object to the hub and returns the URL.
|
||||
Pushes an object to the hub and returns the new commit hash.
|
||||
|
||||
:param repo_full_name: The full name of the repo to push to in the format of
|
||||
`owner/repo`.
|
||||
:param object: The LangChain to serialize and push to the hub.
|
||||
:param api_url: The URL of the LangChain Hub API. Defaults to the hosted API service
|
||||
if you have an api key set, or a localhost instance if not.
|
||||
:param api_key: The API key to use to authenticate with the LangChain Hub API.
|
||||
:param parent_commit_hash: The commit hash of the parent commit to push to. Defaults
|
||||
to the latest commit automatically.
|
||||
:param new_repo_is_public: Whether the repo should be public. Defaults to
|
||||
False (Private by default).
|
||||
:param new_repo_description: The description of the repo. Defaults to an empty
|
||||
string.
|
||||
"""
|
||||
client = _get_client(api_url=api_url, api_key=api_key)
|
||||
manifest_json = dumps(object)
|
||||
resp = client.push(
|
||||
repo_full_name, manifest_json, parent_commit_hash=parent_commit_hash
|
||||
repo_full_name,
|
||||
manifest_json,
|
||||
parent_commit_hash=parent_commit_hash,
|
||||
new_repo_is_public=new_repo_is_public,
|
||||
new_repo_description=new_repo_description,
|
||||
)
|
||||
commit_hash: str = resp["commit"]["commit_hash"]
|
||||
return commit_hash
|
||||
@ -53,7 +69,13 @@ def pull(
|
||||
api_key: Optional[str] = None,
|
||||
) -> Any:
|
||||
"""
|
||||
Pulls an object from the hub and returns it.
|
||||
Pulls an object from the hub and returns it as a LangChain object.
|
||||
|
||||
:param owner_repo_commit: The full name of the repo to pull from in the format of
|
||||
`owner/repo:commit_hash`.
|
||||
:param api_url: The URL of the LangChain Hub API. Defaults to the hosted API service
|
||||
if you have an api key set, or a localhost instance if not.
|
||||
:param api_key: The API key to use to authenticate with the LangChain Hub API.
|
||||
"""
|
||||
client = _get_client(api_url=api_url, api_key=api_key)
|
||||
resp: str = client.pull(owner_repo_commit)
|
||||
|
@ -528,9 +528,13 @@ class BaseLLM(BaseLanguageModel[str], ABC):
|
||||
f" argument of type {type(prompts)}."
|
||||
)
|
||||
# Create callback managers
|
||||
if isinstance(callbacks, list) and (
|
||||
isinstance(callbacks[0], (list, BaseCallbackManager))
|
||||
or callbacks[0] is None
|
||||
if (
|
||||
isinstance(callbacks, list)
|
||||
and callbacks
|
||||
and (
|
||||
isinstance(callbacks[0], (list, BaseCallbackManager))
|
||||
or callbacks[0] is None
|
||||
)
|
||||
):
|
||||
# We've received a list of callbacks args to apply to each input
|
||||
assert len(callbacks) == len(prompts)
|
||||
|
@ -59,6 +59,9 @@ class VLLM(BaseLLM):
|
||||
logprobs: Optional[int] = None
|
||||
"""Number of log probabilities to return per output token."""
|
||||
|
||||
dtype: str = "auto"
|
||||
"""The data type for the model weights and activations."""
|
||||
|
||||
client: Any #: :meta private:
|
||||
|
||||
@root_validator()
|
||||
@ -77,6 +80,7 @@ class VLLM(BaseLLM):
|
||||
model=values["model"],
|
||||
tensor_parallel_size=values["tensor_parallel_size"],
|
||||
trust_remote_code=values["trust_remote_code"],
|
||||
dtype=values["dtype"],
|
||||
)
|
||||
|
||||
return values
|
||||
|
@ -1,6 +1,6 @@
|
||||
from typing import List
|
||||
|
||||
from langchain.pydantic_v1 import BaseModel
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.schema import (
|
||||
BaseChatMessageHistory,
|
||||
)
|
||||
@ -13,7 +13,7 @@ class ChatMessageHistory(BaseChatMessageHistory, BaseModel):
|
||||
Stores messages in an in memory list.
|
||||
"""
|
||||
|
||||
messages: List[BaseMessage] = []
|
||||
messages: List[BaseMessage] = Field(default_factory=list)
|
||||
|
||||
def add_message(self, message: BaseMessage) -> None:
|
||||
"""Add a self-created message to the store"""
|
||||
|
@ -69,6 +69,13 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
when making API calls. If not provided, credentials will be ascertained from
|
||||
the environment."""
|
||||
|
||||
# TODO: Add extra data type handling for type website
|
||||
engine_data_type: int = Field(default=0, ge=0, le=1)
|
||||
""" Defines the enterprise search data type
|
||||
0 - Unstructured data
|
||||
1 - Structured data
|
||||
"""
|
||||
|
||||
_client: SearchServiceClient
|
||||
_serving_config: str
|
||||
|
||||
@ -86,10 +93,18 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
from google.cloud import discoveryengine_v1beta # noqa: F401
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"google.cloud.discoveryengine is not installed. "
|
||||
"google.cloud.discoveryengine is not installed."
|
||||
"Please install it with pip install google-cloud-discoveryengine"
|
||||
) from exc
|
||||
|
||||
try:
|
||||
from google.api_core.exceptions import InvalidArgument # noqa: F401
|
||||
except ImportError as exc:
|
||||
raise ImportError(
|
||||
"google.api_core.exceptions is not installed. "
|
||||
"Please install it with pip install google-api-core"
|
||||
) from exc
|
||||
|
||||
values["project_id"] = get_from_dict_or_env(values, "project_id", "PROJECT_ID")
|
||||
values["search_engine_id"] = get_from_dict_or_env(
|
||||
values, "search_engine_id", "SEARCH_ENGINE_ID"
|
||||
@ -110,7 +125,7 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
serving_config=self.serving_config_id,
|
||||
)
|
||||
|
||||
def _convert_search_response(
|
||||
def _convert_unstructured_search_response(
|
||||
self, results: Sequence[SearchResult]
|
||||
) -> List[Document]:
|
||||
"""Converts a sequence of search results to a list of LangChain documents."""
|
||||
@ -149,6 +164,30 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
|
||||
return documents
|
||||
|
||||
def _convert_structured_search_response(
|
||||
self, results: Sequence[SearchResult]
|
||||
) -> List[Document]:
|
||||
"""Converts a sequence of search results to a list of LangChain documents."""
|
||||
import json
|
||||
|
||||
from google.protobuf.json_format import MessageToDict
|
||||
|
||||
documents: List[Document] = []
|
||||
|
||||
for result in results:
|
||||
document_dict = MessageToDict(
|
||||
result.document._pb, preserving_proto_field_name=True
|
||||
)
|
||||
|
||||
documents.append(
|
||||
Document(
|
||||
page_content=json.dumps(document_dict.get("struct_data", {})),
|
||||
metadata={"id": document_dict["id"], "name": document_dict["name"]},
|
||||
)
|
||||
)
|
||||
|
||||
return documents
|
||||
|
||||
def _create_search_request(self, query: str) -> SearchRequest:
|
||||
"""Prepares a SearchRequest object."""
|
||||
from google.cloud.discoveryengine_v1beta import SearchRequest
|
||||
@ -161,23 +200,32 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
mode=self.spell_correction_mode
|
||||
)
|
||||
|
||||
if self.get_extractive_answers:
|
||||
extractive_content_spec = (
|
||||
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
||||
max_extractive_answer_count=self.max_extractive_answer_count,
|
||||
if self.engine_data_type == 0:
|
||||
if self.get_extractive_answers:
|
||||
extractive_content_spec = (
|
||||
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
||||
max_extractive_answer_count=self.max_extractive_answer_count,
|
||||
)
|
||||
)
|
||||
else:
|
||||
extractive_content_spec = (
|
||||
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
||||
max_extractive_segment_count=self.max_extractive_segment_count,
|
||||
)
|
||||
)
|
||||
content_search_spec = SearchRequest.ContentSearchSpec(
|
||||
extractive_content_spec=extractive_content_spec
|
||||
)
|
||||
elif self.engine_data_type == 1:
|
||||
content_search_spec = None
|
||||
else:
|
||||
extractive_content_spec = (
|
||||
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
|
||||
max_extractive_segment_count=self.max_extractive_segment_count,
|
||||
)
|
||||
# TODO: Add extra data type handling for type website
|
||||
raise NotImplementedError(
|
||||
"Only engine data type 0 (Unstructured) or 1 (Structured)"
|
||||
+ " are supported currently."
|
||||
+ f" Got {self.engine_data_type}"
|
||||
)
|
||||
|
||||
content_search_spec = SearchRequest.ContentSearchSpec(
|
||||
extractive_content_spec=extractive_content_spec,
|
||||
)
|
||||
|
||||
return SearchRequest(
|
||||
query=query,
|
||||
filter=self.filter,
|
||||
@ -192,8 +240,27 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
|
||||
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
|
||||
) -> List[Document]:
|
||||
"""Get documents relevant for a query."""
|
||||
from google.api_core.exceptions import InvalidArgument
|
||||
|
||||
search_request = self._create_search_request(query)
|
||||
response = self._client.search(search_request)
|
||||
documents = self._convert_search_response(response.results)
|
||||
|
||||
try:
|
||||
response = self._client.search(search_request)
|
||||
except InvalidArgument as e:
|
||||
raise type(e)(
|
||||
e.message + " This might be due to engine_data_type not set correctly."
|
||||
)
|
||||
|
||||
if self.engine_data_type == 0:
|
||||
documents = self._convert_unstructured_search_response(response.results)
|
||||
elif self.engine_data_type == 1:
|
||||
documents = self._convert_structured_search_response(response.results)
|
||||
else:
|
||||
# TODO: Add extra data type handling for type website
|
||||
raise NotImplementedError(
|
||||
"Only engine data type 0 (Unstructured) or 1 (Structured)"
|
||||
+ " are supported currently."
|
||||
+ f" Got {self.engine_data_type}"
|
||||
)
|
||||
|
||||
return documents
|
||||
|
@ -97,7 +97,7 @@ class ParentDocumentRetriever(BaseRetriever):
|
||||
def add_documents(
|
||||
self,
|
||||
documents: List[Document],
|
||||
ids: Optional[List[str]],
|
||||
ids: Optional[List[str]] = None,
|
||||
add_to_docstore: bool = True,
|
||||
) -> None:
|
||||
"""Adds documents to the docstore and vectorstores.
|
||||
|
@ -17,6 +17,11 @@ tool for the job.
|
||||
CallbackManagerForToolRun, AsyncCallbackManagerForToolRun
|
||||
"""
|
||||
|
||||
from langchain.tools.ainetwork.app import AINAppOps
|
||||
from langchain.tools.ainetwork.owner import AINOwnerOps
|
||||
from langchain.tools.ainetwork.rule import AINRuleOps
|
||||
from langchain.tools.ainetwork.transfer import AINTransfer
|
||||
from langchain.tools.ainetwork.value import AINValueOps
|
||||
from langchain.tools.arxiv.tool import ArxivQueryRun
|
||||
from langchain.tools.azure_cognitive_services import (
|
||||
AzureCogsFormRecognizerTool,
|
||||
@ -118,6 +123,11 @@ from langchain.tools.youtube.search import YouTubeSearchTool
|
||||
from langchain.tools.zapier.tool import ZapierNLAListActions, ZapierNLARunAction
|
||||
|
||||
__all__ = [
|
||||
"AINAppOps",
|
||||
"AINOwnerOps",
|
||||
"AINRuleOps",
|
||||
"AINTransfer",
|
||||
"AINValueOps",
|
||||
"AIPluginTool",
|
||||
"APIOperation",
|
||||
"ArxivQueryRun",
|
||||
|
95
libs/langchain/langchain/tools/ainetwork/app.py
Normal file
95
libs/langchain/langchain/tools/ainetwork/app.py
Normal file
@ -0,0 +1,95 @@
|
||||
import builtins
|
||||
import json
|
||||
from enum import Enum
|
||||
from typing import List, Optional, Type, Union
|
||||
|
||||
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.tools.ainetwork.base import AINBaseTool
|
||||
|
||||
|
||||
class AppOperationType(str, Enum):
|
||||
SET_ADMIN = "SET_ADMIN"
|
||||
GET_CONFIG = "GET_CONFIG"
|
||||
|
||||
|
||||
class AppSchema(BaseModel):
|
||||
type: AppOperationType = Field(...)
|
||||
appName: str = Field(..., description="Name of the application on the blockchain")
|
||||
address: Optional[Union[str, List[str]]] = Field(
|
||||
None,
|
||||
description=(
|
||||
"A single address or a list of addresses. Default: current session's "
|
||||
"address"
|
||||
),
|
||||
)
|
||||
|
||||
|
||||
class AINAppOps(AINBaseTool):
|
||||
name: str = "AINappOps"
|
||||
description: str = """
|
||||
Create an app in the AINetwork Blockchain database by creating the /apps/<appName> path.
|
||||
An address set as `admin` can grant `owner` rights to other addresses (refer to `AINownerOps` for more details).
|
||||
Also, `admin` is initialized to have all `owner` permissions and `rule` allowed for that path.
|
||||
|
||||
## appName Rule
|
||||
- [a-z_0-9]+
|
||||
|
||||
## address Rules
|
||||
- 0x[0-9a-fA-F]{40}
|
||||
- Defaults to the current session's address
|
||||
- Multiple addresses can be specified if needed
|
||||
|
||||
## SET_ADMIN Example 1
|
||||
- type: SET_ADMIN
|
||||
- appName: ain_project
|
||||
|
||||
### Result:
|
||||
1. Path /apps/ain_project created.
|
||||
2. Current session's address registered as admin.
|
||||
|
||||
## SET_ADMIN Example 2
|
||||
- type: SET_ADMIN
|
||||
- appName: test_project
|
||||
- address: [<address1>, <address2>]
|
||||
|
||||
### Result:
|
||||
1. Path /apps/test_project created.
|
||||
2. <address1> and <address2> registered as admin.
|
||||
|
||||
""" # noqa: E501
|
||||
args_schema: Type[BaseModel] = AppSchema
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
type: AppOperationType,
|
||||
appName: str,
|
||||
address: Optional[Union[str, List[str]]] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
from ain.types import ValueOnlyTransactionInput
|
||||
from ain.utils import getTimestamp
|
||||
|
||||
try:
|
||||
if type is AppOperationType.SET_ADMIN:
|
||||
if address is None:
|
||||
address = self.interface.wallet.defaultAccount.address
|
||||
if isinstance(address, str):
|
||||
address = [address]
|
||||
|
||||
res = await self.interface.db.ref(
|
||||
f"/manage_app/{appName}/create/{getTimestamp()}"
|
||||
).setValue(
|
||||
transactionInput=ValueOnlyTransactionInput(
|
||||
value={"admin": {address: True for address in address}}
|
||||
)
|
||||
)
|
||||
elif type is AppOperationType.GET_CONFIG:
|
||||
res = await self.interface.db.ref(
|
||||
f"/manage_app/{appName}/config"
|
||||
).getValue()
|
||||
else:
|
||||
raise ValueError(f"Unsupported 'type': {type}.")
|
||||
return json.dumps(res, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return f"{builtins.type(e).__name__}: {str(e)}"
|
71
libs/langchain/langchain/tools/ainetwork/base.py
Normal file
71
libs/langchain/langchain/tools/ainetwork/base.py
Normal file
@ -0,0 +1,71 @@
|
||||
"""Base class for AINetwork tools."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import threading
|
||||
from enum import Enum
|
||||
from typing import TYPE_CHECKING, Any, Optional
|
||||
|
||||
from langchain.callbacks.manager import CallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import Field
|
||||
from langchain.tools.ainetwork.utils import authenticate
|
||||
from langchain.tools.base import BaseTool
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ain.ain import Ain
|
||||
|
||||
|
||||
class OperationType(str, Enum):
|
||||
SET = "SET"
|
||||
GET = "GET"
|
||||
|
||||
|
||||
class AINBaseTool(BaseTool):
|
||||
"""Base class for the AINetwork tools."""
|
||||
|
||||
interface: Ain = Field(default_factory=authenticate)
|
||||
"""The interface object for the AINetwork Blockchain."""
|
||||
|
||||
def _run(
|
||||
self,
|
||||
*args: Any,
|
||||
run_manager: Optional[CallbackManagerForToolRun] = None,
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
except RuntimeError:
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
if loop.is_closed():
|
||||
loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(loop)
|
||||
|
||||
if loop.is_running():
|
||||
result_container = []
|
||||
|
||||
def thread_target() -> None:
|
||||
nonlocal result_container
|
||||
new_loop = asyncio.new_event_loop()
|
||||
asyncio.set_event_loop(new_loop)
|
||||
try:
|
||||
result_container.append(
|
||||
new_loop.run_until_complete(self._arun(*args, **kwargs))
|
||||
)
|
||||
except Exception as e:
|
||||
result_container.append(e)
|
||||
finally:
|
||||
new_loop.close()
|
||||
|
||||
thread = threading.Thread(target=thread_target)
|
||||
thread.start()
|
||||
thread.join()
|
||||
result = result_container[0]
|
||||
if isinstance(result, Exception):
|
||||
raise result
|
||||
return result
|
||||
|
||||
else:
|
||||
result = loop.run_until_complete(self._arun(*args, **kwargs))
|
||||
loop.close()
|
||||
return result
|
110
libs/langchain/langchain/tools/ainetwork/owner.py
Normal file
110
libs/langchain/langchain/tools/ainetwork/owner.py
Normal file
@ -0,0 +1,110 @@
|
||||
import builtins
|
||||
import json
|
||||
from typing import List, Optional, Type, Union
|
||||
|
||||
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.tools.ainetwork.base import AINBaseTool, OperationType
|
||||
|
||||
|
||||
class RuleSchema(BaseModel):
|
||||
type: OperationType = Field(...)
|
||||
path: str = Field(..., description="Blockchain reference path")
|
||||
address: Optional[Union[str, List[str]]] = Field(
|
||||
None, description="A single address or a list of addresses"
|
||||
)
|
||||
write_owner: Optional[bool] = Field(
|
||||
False, description="Authority to edit the `owner` property of the path"
|
||||
)
|
||||
write_rule: Optional[bool] = Field(
|
||||
False, description="Authority to edit `write rule` for the path"
|
||||
)
|
||||
write_function: Optional[bool] = Field(
|
||||
False, description="Authority to `set function` for the path"
|
||||
)
|
||||
branch_owner: Optional[bool] = Field(
|
||||
False, description="Authority to initialize `owner` of sub-paths"
|
||||
)
|
||||
|
||||
|
||||
class AINOwnerOps(AINBaseTool):
|
||||
name: str = "AINownerOps"
|
||||
description: str = """
|
||||
Rules for `owner` in AINetwork Blockchain database.
|
||||
An address set as `owner` can modify permissions according to its granted authorities
|
||||
|
||||
## Path Rule
|
||||
- (/[a-zA-Z_0-9]+)+
|
||||
- Permission checks ascend from the most specific (child) path to broader (parent) paths until an `owner` is located.
|
||||
|
||||
## Address Rules
|
||||
- 0x[0-9a-fA-F]{40}: 40-digit hexadecimal address
|
||||
- *: All addresses permitted
|
||||
- Defaults to the current session's address
|
||||
|
||||
## SET
|
||||
- `SET` alters permissions for specific addresses, while other addresses remain unaffected.
|
||||
- When removing an address of `owner`, set all authorities for that address to false.
|
||||
- message `write_owner permission evaluated false` if fail
|
||||
|
||||
### Example
|
||||
- type: SET
|
||||
- path: /apps/langchain
|
||||
- address: [<address 1>, <address 2>]
|
||||
- write_owner: True
|
||||
- write_rule: True
|
||||
- write_function: True
|
||||
- branch_owner: True
|
||||
|
||||
## GET
|
||||
- Provides all addresses with `owner` permissions and their authorities in the path.
|
||||
|
||||
### Example
|
||||
- type: GET
|
||||
- path: /apps/langchain
|
||||
""" # noqa: E501
|
||||
args_schema: Type[BaseModel] = RuleSchema
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
type: OperationType,
|
||||
path: str,
|
||||
address: Optional[Union[str, List[str]]] = None,
|
||||
write_owner: Optional[bool] = None,
|
||||
write_rule: Optional[bool] = None,
|
||||
write_function: Optional[bool] = None,
|
||||
branch_owner: Optional[bool] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
from ain.types import ValueOnlyTransactionInput
|
||||
|
||||
try:
|
||||
if type is OperationType.SET:
|
||||
if address is None:
|
||||
address = self.interface.wallet.defaultAccount.address
|
||||
if isinstance(address, str):
|
||||
address = [address]
|
||||
res = await self.interface.db.ref(path).setOwner(
|
||||
transactionInput=ValueOnlyTransactionInput(
|
||||
value={
|
||||
".owner": {
|
||||
"owners": {
|
||||
address: {
|
||||
"write_owner": write_owner or False,
|
||||
"write_rule": write_rule or False,
|
||||
"write_function": write_function or False,
|
||||
"branch_owner": branch_owner or False,
|
||||
}
|
||||
for address in address
|
||||
}
|
||||
}
|
||||
}
|
||||
)
|
||||
)
|
||||
elif type is OperationType.GET:
|
||||
res = await self.interface.db.ref(path).getOwner()
|
||||
else:
|
||||
raise ValueError(f"Unsupported 'type': {type}.")
|
||||
return json.dumps(res, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return f"{builtins.type(e).__name__}: {str(e)}"
|
77
libs/langchain/langchain/tools/ainetwork/rule.py
Normal file
77
libs/langchain/langchain/tools/ainetwork/rule.py
Normal file
@ -0,0 +1,77 @@
|
||||
import builtins
|
||||
import json
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.tools.ainetwork.base import AINBaseTool, OperationType
|
||||
|
||||
|
||||
class RuleSchema(BaseModel):
|
||||
type: OperationType = Field(...)
|
||||
path: str = Field(..., description="Path on the blockchain where the rule applies")
|
||||
eval: Optional[str] = Field(None, description="eval string to determine permission")
|
||||
|
||||
|
||||
class AINRuleOps(AINBaseTool):
|
||||
name: str = "AINruleOps"
|
||||
description: str = """
|
||||
Covers the write `rule` for the AINetwork Blockchain database. The SET type specifies write permissions using the `eval` variable as a JavaScript eval string.
|
||||
In order to AINvalueOps with SET at the path, the execution result of the `eval` string must be true.
|
||||
|
||||
## Path Rules
|
||||
1. Allowed characters for directory: `[a-zA-Z_0-9]`
|
||||
2. Use `$<key>` for template variables as directory.
|
||||
|
||||
## Eval String Special Variables
|
||||
- auth.addr: Address of the writer for the path
|
||||
- newData: New data for the path
|
||||
- data: Current data for the path
|
||||
- currentTime: Time in seconds
|
||||
- lastBlockNumber: Latest processed block number
|
||||
|
||||
## Eval String Functions
|
||||
- getValue(<path>)
|
||||
- getRule(<path>)
|
||||
- getOwner(<path>)
|
||||
- getFunction(<path>)
|
||||
- evalRule(<path>, <value to set>, auth, currentTime)
|
||||
- evalOwner(<path>, 'write_owner', auth)
|
||||
|
||||
## SET Example
|
||||
- type: SET
|
||||
- path: /apps/langchain_project_1/$from/$to/$img
|
||||
- eval: auth.addr===$from&&!getValue('/apps/image_db/'+$img)
|
||||
|
||||
## GET Example
|
||||
- type: GET
|
||||
- path: /apps/langchain_project_1
|
||||
""" # noqa: E501
|
||||
args_schema: Type[BaseModel] = RuleSchema
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
type: OperationType,
|
||||
path: str,
|
||||
eval: Optional[str] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
from ain.types import ValueOnlyTransactionInput
|
||||
|
||||
try:
|
||||
if type is OperationType.SET:
|
||||
if eval is None:
|
||||
raise ValueError("'eval' is required for SET operation.")
|
||||
|
||||
res = await self.interface.db.ref(path).setRule(
|
||||
transactionInput=ValueOnlyTransactionInput(
|
||||
value={".rule": {"write": eval}}
|
||||
)
|
||||
)
|
||||
elif type is OperationType.GET:
|
||||
res = await self.interface.db.ref(path).getRule()
|
||||
else:
|
||||
raise ValueError(f"Unsupported 'type': {type}.")
|
||||
return json.dumps(res, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return f"{builtins.type(e).__name__}: {str(e)}"
|
29
libs/langchain/langchain/tools/ainetwork/transfer.py
Normal file
29
libs/langchain/langchain/tools/ainetwork/transfer.py
Normal file
@ -0,0 +1,29 @@
|
||||
import json
|
||||
from typing import Optional, Type
|
||||
|
||||
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.tools.ainetwork.base import AINBaseTool
|
||||
|
||||
|
||||
class TransferSchema(BaseModel):
|
||||
address: str = Field(..., description="Address to transfer AIN to")
|
||||
amount: int = Field(..., description="Amount of AIN to transfer")
|
||||
|
||||
|
||||
class AINTransfer(AINBaseTool):
|
||||
name: str = "AINtransfer"
|
||||
description: str = "Transfers AIN to a specified address"
|
||||
args_schema: Type[TransferSchema] = TransferSchema
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
address: str,
|
||||
amount: int,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
try:
|
||||
res = await self.interface.wallet.transfer(address, amount, nonce=-1)
|
||||
return json.dumps(res, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return f"{type(e).__name__}: {str(e)}"
|
62
libs/langchain/langchain/tools/ainetwork/utils.py
Normal file
62
libs/langchain/langchain/tools/ainetwork/utils.py
Normal file
@ -0,0 +1,62 @@
|
||||
"""AINetwork Blockchain tool utils."""
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from typing import TYPE_CHECKING, Literal, Optional
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from ain.ain import Ain
|
||||
|
||||
|
||||
def authenticate(network: Optional[Literal["mainnet", "testnet"]] = "testnet") -> Ain:
|
||||
"""Authenticate using the AIN Blockchain"""
|
||||
|
||||
try:
|
||||
from ain.ain import Ain
|
||||
except ImportError as e:
|
||||
raise ImportError(
|
||||
"Cannot import ain-py related modules. Please install the package with "
|
||||
"`pip install ain-py`."
|
||||
) from e
|
||||
|
||||
if network == "mainnet":
|
||||
provider_url = "https://mainnet-api.ainetwork.ai/"
|
||||
chain_id = 1
|
||||
if "AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY" in os.environ:
|
||||
private_key = os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"]
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
"Error: The AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY environmental variable "
|
||||
"has not been set."
|
||||
)
|
||||
elif network == "testnet":
|
||||
provider_url = "https://testnet-api.ainetwork.ai/"
|
||||
chain_id = 0
|
||||
if "AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY" in os.environ:
|
||||
private_key = os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"]
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
"Error: The AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY environmental variable "
|
||||
"has not been set."
|
||||
)
|
||||
elif network is None:
|
||||
if (
|
||||
"AIN_BLOCKCHAIN_PROVIDER_URL" in os.environ
|
||||
and "AIN_BLOCKCHAIN_CHAIN_ID" in os.environ
|
||||
and "AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY" in os.environ
|
||||
):
|
||||
provider_url = os.environ["AIN_BLOCKCHAIN_PROVIDER_URL"]
|
||||
chain_id = int(os.environ["AIN_BLOCKCHAIN_CHAIN_ID"])
|
||||
private_key = os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"]
|
||||
else:
|
||||
raise EnvironmentError(
|
||||
"Error: The AIN_BLOCKCHAIN_PROVIDER_URL and "
|
||||
"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY and AIN_BLOCKCHAIN_CHAIN_ID "
|
||||
"environmental variable has not been set."
|
||||
)
|
||||
else:
|
||||
raise ValueError(f"Unsupported 'network': {network}")
|
||||
|
||||
ain = Ain(provider_url, chain_id)
|
||||
ain.wallet.addAndSetDefaultAccount(private_key)
|
||||
return ain
|
80
libs/langchain/langchain/tools/ainetwork/value.py
Normal file
80
libs/langchain/langchain/tools/ainetwork/value.py
Normal file
@ -0,0 +1,80 @@
|
||||
import builtins
|
||||
import json
|
||||
from typing import Optional, Type, Union
|
||||
|
||||
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
|
||||
from langchain.pydantic_v1 import BaseModel, Field
|
||||
from langchain.tools.ainetwork.base import AINBaseTool, OperationType
|
||||
|
||||
|
||||
class ValueSchema(BaseModel):
|
||||
type: OperationType = Field(...)
|
||||
path: str = Field(..., description="Blockchain reference path")
|
||||
value: Optional[Union[int, str, float, dict]] = Field(
|
||||
None, description="Value to be set at the path"
|
||||
)
|
||||
|
||||
|
||||
class AINValueOps(AINBaseTool):
|
||||
name: str = "AINvalueOps"
|
||||
description: str = """
|
||||
Covers the read and write value for the AINetwork Blockchain database.
|
||||
|
||||
## SET
|
||||
- Set a value at a given path
|
||||
|
||||
### Example
|
||||
- type: SET
|
||||
- path: /apps/langchain_test_1/object
|
||||
- value: {1: 2, "34": 56}
|
||||
|
||||
## GET
|
||||
- Retrieve a value at a given path
|
||||
|
||||
### Example
|
||||
- type: GET
|
||||
- path: /apps/langchain_test_1/DB
|
||||
|
||||
## Special paths
|
||||
- `/accounts/<address>/balance`: Account balance
|
||||
- `/accounts/<address>/nonce`: Account nonce
|
||||
- `/apps`: Applications
|
||||
- `/consensus`: Consensus
|
||||
- `/checkin`: Check-in
|
||||
- `/deposit/<service id>/<address>/<deposit id>`: Deposit
|
||||
- `/deposit_accounts/<service id>/<address>/<account id>`: Deposit accounts
|
||||
- `/escrow`: Escrow
|
||||
- `/payments`: Payment
|
||||
- `/sharding`: Sharding
|
||||
- `/token/name`: Token name
|
||||
- `/token/symbol`: Token symbol
|
||||
- `/token/total_supply`: Token total supply
|
||||
- `/transfer/<address from>/<address to>/<key>/value`: Transfer
|
||||
- `/withdraw/<service id>/<address>/<withdraw id>`: Withdraw
|
||||
"""
|
||||
args_schema: Type[BaseModel] = ValueSchema
|
||||
|
||||
async def _arun(
|
||||
self,
|
||||
type: OperationType,
|
||||
path: str,
|
||||
value: Optional[Union[int, str, float, dict]] = None,
|
||||
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
|
||||
) -> str:
|
||||
from ain.types import ValueOnlyTransactionInput
|
||||
|
||||
try:
|
||||
if type is OperationType.SET:
|
||||
if value is None:
|
||||
raise ValueError("'value' is required for SET operation.")
|
||||
|
||||
res = await self.interface.db.ref(path).setValue(
|
||||
transactionInput=ValueOnlyTransactionInput(value=value)
|
||||
)
|
||||
elif type is OperationType.GET:
|
||||
res = await self.interface.db.ref(path).getValue()
|
||||
else:
|
||||
raise ValueError(f"Unsupported 'type': {type}.")
|
||||
return json.dumps(res, ensure_ascii=False)
|
||||
except Exception as e:
|
||||
return f"{builtins.type(e).__name__}: {str(e)}"
|
@ -6,13 +6,13 @@ import re
|
||||
import sys
|
||||
from contextlib import redirect_stdout
|
||||
from io import StringIO
|
||||
from typing import Any, Dict, Optional
|
||||
from typing import Any, Dict, Optional, Type
|
||||
|
||||
from langchain.callbacks.manager import (
|
||||
AsyncCallbackManagerForToolRun,
|
||||
CallbackManagerForToolRun,
|
||||
)
|
||||
from langchain.pydantic_v1 import Field, root_validator
|
||||
from langchain.pydantic_v1 import BaseModel, Field, root_validator
|
||||
from langchain.tools.base import BaseTool
|
||||
from langchain.utilities import PythonREPL
|
||||
|
||||
@ -77,6 +77,10 @@ class PythonREPLTool(BaseTool):
|
||||
return result
|
||||
|
||||
|
||||
class PythonInputs(BaseModel):
|
||||
query: str = Field(description="code snippet to run")
|
||||
|
||||
|
||||
class PythonAstREPLTool(BaseTool):
|
||||
"""A tool for running python code in a REPL."""
|
||||
|
||||
@ -90,6 +94,7 @@ class PythonAstREPLTool(BaseTool):
|
||||
globals: Optional[Dict] = Field(default_factory=dict)
|
||||
locals: Optional[Dict] = Field(default_factory=dict)
|
||||
sanitize_input: bool = True
|
||||
args_schema: Type[BaseModel] = PythonInputs
|
||||
|
||||
@root_validator(pre=True)
|
||||
def validate_python_version(cls, values: Dict) -> Dict:
|
||||
|
@ -1,10 +1,12 @@
|
||||
from collections import deque
|
||||
from itertools import islice
|
||||
from typing import (
|
||||
Any,
|
||||
ContextManager,
|
||||
Deque,
|
||||
Generator,
|
||||
Generic,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
@ -161,3 +163,13 @@ class Tee(Generic[T]):
|
||||
|
||||
# Why this is needed https://stackoverflow.com/a/44638570
|
||||
safetee = Tee
|
||||
|
||||
|
||||
def batch_iterate(size: int, iterable: Iterable[T]) -> Iterator[List[T]]:
|
||||
"""Utility batching function."""
|
||||
it = iter(iterable)
|
||||
while True:
|
||||
chunk = list(islice(it, size))
|
||||
if not chunk:
|
||||
return
|
||||
yield chunk
|
||||
|
@ -21,7 +21,7 @@ def _create_client(
|
||||
try:
|
||||
import meilisearch
|
||||
except ImportError:
|
||||
raise ValueError(
|
||||
raise ImportError(
|
||||
"Could not import meilisearch python package. "
|
||||
"Please install it with `pip install meilisearch`."
|
||||
)
|
||||
|
@ -52,6 +52,9 @@ class Milvus(VectorStore):
|
||||
default of index.
|
||||
drop_old (Optional[bool]): Whether to drop the current collection. Defaults
|
||||
to False.
|
||||
primary_field (str): Name of the primary key field. Defaults to "pk".
|
||||
text_field (str): Name of the text field. Defaults to "text".
|
||||
vector_field (str): Name of the vector field. Defaults to "vector".
|
||||
|
||||
The connection args used for this class comes in the form of a dict,
|
||||
here are a few of the options:
|
||||
@ -107,6 +110,10 @@ class Milvus(VectorStore):
|
||||
index_params: Optional[dict] = None,
|
||||
search_params: Optional[dict] = None,
|
||||
drop_old: Optional[bool] = False,
|
||||
*,
|
||||
primary_field: str = "pk",
|
||||
text_field: str = "text",
|
||||
vector_field: str = "vector",
|
||||
):
|
||||
"""Initialize the Milvus vector store."""
|
||||
try:
|
||||
@ -138,11 +145,11 @@ class Milvus(VectorStore):
|
||||
self.consistency_level = consistency_level
|
||||
|
||||
# In order for a collection to be compatible, pk needs to be auto'id and int
|
||||
self._primary_field = "pk"
|
||||
# In order for compatiblility, the text field will need to be called "text"
|
||||
self._text_field = "text"
|
||||
self._primary_field = primary_field
|
||||
# In order for compatibility, the text field will need to be called "text"
|
||||
self._text_field = text_field
|
||||
# In order for compatibility, the vector field needs to be called "vector"
|
||||
self._vector_field = "vector"
|
||||
self._vector_field = vector_field
|
||||
self.fields: list[str] = []
|
||||
# Create the connection to the server
|
||||
if connection_args is None:
|
||||
|
5073
libs/langchain/poetry.lock
generated
5073
libs/langchain/poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "langchain"
|
||||
version = "0.0.270"
|
||||
version = "0.0.271"
|
||||
description = "Building applications with LLMs through composability"
|
||||
authors = []
|
||||
license = "MIT"
|
||||
@ -125,6 +125,8 @@ newspaper3k = {version = "^0.2.8", optional = true}
|
||||
amazon-textract-caller = {version = "<2", optional = true}
|
||||
xata = {version = "^1.0.0a7", optional = true}
|
||||
xmltodict = {version = "^0.13.0", optional = true}
|
||||
google-api-core = {version = "^2.11.1", optional = true}
|
||||
markdownify = {version = "^0.11.6", optional = true}
|
||||
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
@ -337,6 +339,7 @@ extended_testing = [
|
||||
"xmltodict",
|
||||
"faiss-cpu",
|
||||
"openapi-schema-pydantic",
|
||||
"markdownify",
|
||||
]
|
||||
|
||||
[tool.ruff]
|
||||
|
@ -0,0 +1,175 @@
|
||||
import asyncio
|
||||
import os
|
||||
import time
|
||||
import urllib.request
|
||||
import uuid
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
from urllib.error import HTTPError
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.agents import AgentType, initialize_agent
|
||||
from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
|
||||
from langchain.chat_models import ChatOpenAI
|
||||
from langchain.tools.ainetwork.utils import authenticate
|
||||
|
||||
|
||||
class Match(Enum):
|
||||
__test__ = False
|
||||
ListWildcard = 1
|
||||
StrWildcard = 2
|
||||
DictWildcard = 3
|
||||
IntWildcard = 4
|
||||
FloatWildcard = 5
|
||||
ObjectWildcard = 6
|
||||
|
||||
@classmethod
|
||||
def match(cls, value: Any, template: Any) -> bool:
|
||||
if template is cls.ListWildcard:
|
||||
return isinstance(value, list)
|
||||
elif template is cls.StrWildcard:
|
||||
return isinstance(value, str)
|
||||
elif template is cls.DictWildcard:
|
||||
return isinstance(value, dict)
|
||||
elif template is cls.IntWildcard:
|
||||
return isinstance(value, int)
|
||||
elif template is cls.FloatWildcard:
|
||||
return isinstance(value, float)
|
||||
elif template is cls.ObjectWildcard:
|
||||
return True
|
||||
elif type(value) != type(template):
|
||||
return False
|
||||
elif isinstance(value, dict):
|
||||
if len(value) != len(template):
|
||||
return False
|
||||
for k, v in value.items():
|
||||
if k not in template or not cls.match(v, template[k]):
|
||||
return False
|
||||
return True
|
||||
elif isinstance(value, list):
|
||||
if len(value) != len(template):
|
||||
return False
|
||||
for i in range(len(value)):
|
||||
if not cls.match(value[i], template[i]):
|
||||
return False
|
||||
return True
|
||||
else:
|
||||
return value == template
|
||||
|
||||
|
||||
@pytest.mark.requires("ain")
|
||||
def test_ainetwork_toolkit() -> None:
|
||||
def get(path: str, type: str = "value", default: Any = None) -> Any:
|
||||
ref = ain.db.ref(path)
|
||||
value = asyncio.run(
|
||||
{
|
||||
"value": ref.getValue,
|
||||
"rule": ref.getRule,
|
||||
"owner": ref.getOwner,
|
||||
}[type]()
|
||||
)
|
||||
return default if value is None else value
|
||||
|
||||
def validate(path: str, template: Any, type: str = "value") -> bool:
|
||||
value = get(path, type)
|
||||
return Match.match(value, template)
|
||||
|
||||
if not os.environ.get("AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY", None):
|
||||
from ain.account import Account
|
||||
|
||||
account = Account.create()
|
||||
os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"] = account.private_key
|
||||
|
||||
interface = authenticate(network="testnet")
|
||||
toolkit = AINetworkToolkit(network="testnet", interface=interface)
|
||||
llm = ChatOpenAI(model="gpt-4", temperature=0)
|
||||
agent = initialize_agent(
|
||||
tools=toolkit.get_tools(),
|
||||
llm=llm,
|
||||
verbose=True,
|
||||
agent=AgentType.OPENAI_FUNCTIONS,
|
||||
)
|
||||
ain = interface
|
||||
self_address = ain.wallet.defaultAccount.address
|
||||
co_address = "0x6813Eb9362372EEF6200f3b1dbC3f819671cBA69"
|
||||
|
||||
# Test creating an app
|
||||
UUID = uuid.UUID(
|
||||
int=(int(time.time() * 1000) << 64) | (uuid.uuid4().int & ((1 << 64) - 1))
|
||||
)
|
||||
app_name = f"_langchain_test__{str(UUID).replace('-', '_')}"
|
||||
agent.run(f"""Create app {app_name}""")
|
||||
validate(f"/manage_app/{app_name}/config", {"admin": {self_address: True}})
|
||||
validate(f"/apps/{app_name}/DB", None, "owner")
|
||||
|
||||
# Test reading owner config
|
||||
agent.run(f"""Read owner config of /apps/{app_name}/DB .""")
|
||||
assert ...
|
||||
|
||||
# Test granting owner config
|
||||
agent.run(
|
||||
f"""Grant owner authority to {co_address} for edit write rule permission of /apps/{app_name}/DB_co .""" # noqa: E501
|
||||
)
|
||||
validate(
|
||||
f"/apps/{app_name}/DB_co",
|
||||
{
|
||||
".owner": {
|
||||
"owners": {
|
||||
co_address: {
|
||||
"branch_owner": False,
|
||||
"write_function": False,
|
||||
"write_owner": False,
|
||||
"write_rule": True,
|
||||
}
|
||||
}
|
||||
}
|
||||
},
|
||||
"owner",
|
||||
)
|
||||
|
||||
# Test reading owner config
|
||||
agent.run(f"""Read owner config of /apps/{app_name}/DB_co .""")
|
||||
assert ...
|
||||
|
||||
# Test reading owner config
|
||||
agent.run(f"""Read owner config of /apps/{app_name}/DB .""")
|
||||
assert ... # Check if owner {self_address} exists
|
||||
|
||||
# Test reading a value
|
||||
agent.run(f"""Read value in /apps/{app_name}/DB""")
|
||||
assert ... # empty
|
||||
|
||||
# Test writing a value
|
||||
agent.run(f"""Write value {{1: 1904, 2: 43}} in /apps/{app_name}/DB""")
|
||||
validate(f"/apps/{app_name}/DB", {1: 1904, 2: 43})
|
||||
|
||||
# Test reading a value
|
||||
agent.run(f"""Read value in /apps/{app_name}/DB""")
|
||||
assert ... # check value
|
||||
|
||||
# Test reading a rule
|
||||
agent.run(f"""Read write rule of app {app_name} .""")
|
||||
assert ... # check rule that self_address exists
|
||||
|
||||
# Test sending AIN
|
||||
self_balance = get(f"/accounts/{self_address}/balance", default=0)
|
||||
transaction_history = get(f"/transfer/{self_address}/{co_address}", default={})
|
||||
if self_balance < 1:
|
||||
try:
|
||||
with urllib.request.urlopen(
|
||||
f"http://faucet.ainetwork.ai/api/test/{self_address}/"
|
||||
) as response:
|
||||
try_test = response.getcode()
|
||||
except HTTPError as e:
|
||||
try_test = e.getcode()
|
||||
else:
|
||||
try_test = 200
|
||||
|
||||
if try_test == 200:
|
||||
agent.run(f"""Send 1 AIN to {co_address}""")
|
||||
transaction_update = get(f"/transfer/{self_address}/{co_address}", default={})
|
||||
assert any(
|
||||
transaction_update[key]["value"] == 1
|
||||
for key in transaction_update.keys() - transaction_history.keys()
|
||||
)
|
@ -55,6 +55,21 @@ def test_connect_arangodb() -> None:
|
||||
assert ["hello_world"] == sample_aql_result
|
||||
|
||||
|
||||
def test_empty_schema_on_no_data() -> None:
|
||||
"""Test that the schema is empty for an empty ArangoDB Database"""
|
||||
db = get_arangodb_client()
|
||||
db.delete_graph("GameOfThrones", drop_collections=True, ignore_missing=True)
|
||||
db.delete_collection("empty_collection", ignore_missing=True)
|
||||
db.create_collection("empty_collection")
|
||||
|
||||
graph = ArangoGraph(db)
|
||||
|
||||
assert graph.schema == {
|
||||
"Graph Schema": [],
|
||||
"Collection Schema": [],
|
||||
}
|
||||
|
||||
|
||||
def test_aql_generation() -> None:
|
||||
"""Test that AQL statement is correctly generated and executed."""
|
||||
db = get_arangodb_client()
|
||||
|
@ -1,6 +1,8 @@
|
||||
"""Test ChatOpenAI wrapper."""
|
||||
|
||||
|
||||
from typing import Any
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.callbacks.manager import CallbackManager
|
||||
@ -89,6 +91,34 @@ def test_chat_openai_streaming() -> None:
|
||||
assert isinstance(response, BaseMessage)
|
||||
|
||||
|
||||
@pytest.mark.scheduled
|
||||
def test_chat_openai_streaming_generation_info() -> None:
|
||||
"""Test that generation info is preserved when streaming."""
|
||||
|
||||
class _FakeCallback(FakeCallbackHandler):
|
||||
saved_things: dict = {}
|
||||
|
||||
def on_llm_end(
|
||||
self,
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> Any:
|
||||
# Save the generation
|
||||
self.saved_things["generation"] = args[0]
|
||||
|
||||
callback = _FakeCallback()
|
||||
callback_manager = CallbackManager([callback])
|
||||
chat = ChatOpenAI(
|
||||
max_tokens=2,
|
||||
temperature=0,
|
||||
callback_manager=callback_manager,
|
||||
)
|
||||
list(chat.stream("hi"))
|
||||
generation = callback.saved_things["generation"]
|
||||
# `Hello!` is two tokens, assert that that is what is returned
|
||||
assert generation.generations[0][0].text == "Hello!"
|
||||
|
||||
|
||||
def test_chat_openai_llm_output_contains_model_name() -> None:
|
||||
"""Test llm_output contains model_name."""
|
||||
chat = ChatOpenAI(max_tokens=10)
|
||||
|
@ -0,0 +1,48 @@
|
||||
import polars as pl
|
||||
import pytest
|
||||
|
||||
from langchain.document_loaders import PolarsDataFrameLoader
|
||||
from langchain.schema import Document
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def sample_data_frame() -> pl.DataFrame:
|
||||
data = {
|
||||
"text": ["Hello", "World"],
|
||||
"author": ["Alice", "Bob"],
|
||||
"date": ["2022-01-01", "2022-01-02"],
|
||||
}
|
||||
return pl.DataFrame(data)
|
||||
|
||||
|
||||
def test_load_returns_list_of_documents(sample_data_frame: pl.DataFrame) -> None:
|
||||
loader = PolarsDataFrameLoader(sample_data_frame)
|
||||
docs = loader.load()
|
||||
assert isinstance(docs, list)
|
||||
assert all(isinstance(doc, Document) for doc in docs)
|
||||
assert len(docs) == 2
|
||||
|
||||
|
||||
def test_load_converts_dataframe_columns_to_document_metadata(
|
||||
sample_data_frame: pl.DataFrame,
|
||||
) -> None:
|
||||
loader = PolarsDataFrameLoader(sample_data_frame)
|
||||
docs = loader.load()
|
||||
|
||||
for i, doc in enumerate(docs):
|
||||
df: pl.DataFrame = sample_data_frame[i]
|
||||
assert df is not None
|
||||
assert doc.metadata["author"] == df.select("author").item()
|
||||
assert doc.metadata["date"] == df.select("date").item()
|
||||
|
||||
|
||||
def test_load_uses_page_content_column_to_create_document_text(
|
||||
sample_data_frame: pl.DataFrame,
|
||||
) -> None:
|
||||
sample_data_frame = sample_data_frame.rename(mapping={"text": "dummy_test_column"})
|
||||
loader = PolarsDataFrameLoader(
|
||||
sample_data_frame, page_content_column="dummy_test_column"
|
||||
)
|
||||
docs = loader.load()
|
||||
assert docs[0].page_content == "Hello"
|
||||
assert docs[1].page_content == "World"
|
@ -11,12 +11,15 @@ PROJECT_ID - set to your Google Cloud project ID
|
||||
SEARCH_ENGINE_ID - the ID of the search engine to use for the test
|
||||
"""
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.retrievers.google_cloud_enterprise_search import (
|
||||
GoogleCloudEnterpriseSearchRetriever,
|
||||
)
|
||||
from langchain.schema import Document
|
||||
|
||||
|
||||
@pytest.mark.requires("google_api_core")
|
||||
def test_google_cloud_enterprise_search_get_relevant_documents() -> None:
|
||||
"""Test the get_relevant_documents() method."""
|
||||
retriever = GoogleCloudEnterpriseSearchRetriever()
|
||||
|
@ -6,7 +6,7 @@ import pytest
|
||||
import requests
|
||||
|
||||
from langchain.docstore.document import Document
|
||||
from langchain.document_loaders.confluence import ConfluenceLoader
|
||||
from langchain.document_loaders.confluence import ConfluenceLoader, ContentFormat
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@ -152,6 +152,40 @@ class TestConfluenceLoader:
|
||||
assert mock_confluence.cql.call_count == 0
|
||||
assert mock_confluence.get_page_child_by_type.call_count == 0
|
||||
|
||||
def test_confluence_loader_when_content_format_and_keep_markdown_format_enabled(
|
||||
self, mock_confluence: MagicMock
|
||||
) -> None:
|
||||
# one response with two pages
|
||||
mock_confluence.get_all_pages_from_space.return_value = [
|
||||
self._get_mock_page("123", ContentFormat.VIEW),
|
||||
self._get_mock_page("456", ContentFormat.VIEW),
|
||||
]
|
||||
mock_confluence.get_all_restrictions_for_content.side_effect = [
|
||||
self._get_mock_page_restrictions("123"),
|
||||
self._get_mock_page_restrictions("456"),
|
||||
]
|
||||
|
||||
confluence_loader = self._get_mock_confluence_loader(mock_confluence)
|
||||
|
||||
documents = confluence_loader.load(
|
||||
space_key=self.MOCK_SPACE_KEY,
|
||||
content_format=ContentFormat.VIEW,
|
||||
keep_markdown_format=True,
|
||||
max_pages=2,
|
||||
)
|
||||
|
||||
assert mock_confluence.get_all_pages_from_space.call_count == 1
|
||||
|
||||
assert len(documents) == 2
|
||||
assert all(isinstance(doc, Document) for doc in documents)
|
||||
assert documents[0].page_content == "Content 123\n\n"
|
||||
assert documents[1].page_content == "Content 456\n\n"
|
||||
|
||||
assert mock_confluence.get_page_by_id.call_count == 0
|
||||
assert mock_confluence.get_all_pages_by_label.call_count == 0
|
||||
assert mock_confluence.cql.call_count == 0
|
||||
assert mock_confluence.get_page_child_by_type.call_count == 0
|
||||
|
||||
def _get_mock_confluence_loader(
|
||||
self, mock_confluence: MagicMock
|
||||
) -> ConfluenceLoader:
|
||||
@ -163,11 +197,15 @@ class TestConfluenceLoader:
|
||||
confluence_loader.confluence = mock_confluence
|
||||
return confluence_loader
|
||||
|
||||
def _get_mock_page(self, page_id: str) -> Dict:
|
||||
def _get_mock_page(
|
||||
self, page_id: str, content_format: ContentFormat = ContentFormat.STORAGE
|
||||
) -> Dict:
|
||||
return {
|
||||
"id": f"{page_id}",
|
||||
"title": f"Page {page_id}",
|
||||
"body": {"storage": {"value": f"<p>Content {page_id}</p>"}},
|
||||
"body": {
|
||||
f"{content_format.name.lower()}": {"value": f"<p>Content {page_id}</p>"}
|
||||
},
|
||||
"status": "current",
|
||||
"type": "page",
|
||||
"_links": {
|
||||
|
@ -23,7 +23,7 @@ def init_repo(tmpdir: py.path.local, dir_name: str) -> str:
|
||||
git.add([sample_file])
|
||||
git.commit(m="Initial commit")
|
||||
|
||||
return repo_dir
|
||||
return str(repo_dir)
|
||||
|
||||
|
||||
@pytest.mark.requires("git")
|
||||
|
@ -2,6 +2,11 @@
|
||||
from langchain.tools import __all__ as public_api
|
||||
|
||||
_EXPECTED = [
|
||||
"AINAppOps",
|
||||
"AINOwnerOps",
|
||||
"AINRuleOps",
|
||||
"AINTransfer",
|
||||
"AINValueOps",
|
||||
"AIPluginTool",
|
||||
"APIOperation",
|
||||
"ArxivQueryRun",
|
||||
|
0
libs/langchain/tests/unit_tests/utils/__init__.py
Normal file
0
libs/langchain/tests/unit_tests/utils/__init__.py
Normal file
21
libs/langchain/tests/unit_tests/utils/test_iter.py
Normal file
21
libs/langchain/tests/unit_tests/utils/test_iter.py
Normal file
@ -0,0 +1,21 @@
|
||||
from typing import List
|
||||
|
||||
import pytest
|
||||
|
||||
from langchain.utils.iter import batch_iterate
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"input_size, input_iterable, expected_output",
|
||||
[
|
||||
(2, [1, 2, 3, 4, 5], [[1, 2], [3, 4], [5]]),
|
||||
(3, [10, 20, 30, 40, 50], [[10, 20, 30], [40, 50]]),
|
||||
(1, [100, 200, 300], [[100], [200], [300]]),
|
||||
(4, [], []),
|
||||
],
|
||||
)
|
||||
def test_batch_iterate(
|
||||
input_size: int, input_iterable: List[str], expected_output: List[str]
|
||||
) -> None:
|
||||
"""Test batching function."""
|
||||
assert list(batch_iterate(input_size, input_iterable)) == expected_output
|
758
poetry.lock
generated
758
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user