Merge branch 'master' into master

This commit is contained in:
Julien Salinas 2023-08-23 14:42:40 +02:00 committed by GitHub
commit f1072cc31f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
90 changed files with 5790 additions and 3810 deletions

View File

@ -80,10 +80,10 @@ For example, to contribute to `langchain` run `cd libs/langchain` before getting
To install requirements:
```bash
poetry install -E all
poetry install --with test
```
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage. Note the `-E all` flag will install all optional dependencies necessary for integration testing.
This will install all requirements for running the package, examples, linting, formatting, tests, and coverage.
❗Note: If during installation you receive a `WheelFileValidationError` for `debugpy`, please make sure you are running Poetry v1.5.1. This bug was present in older versions of Poetry (e.g. 1.4.1) and has been resolved in newer releases. If you are still seeing this bug on v1.5.1, you may also try disabling "modern installation" (`poetry config installer.modern-installation false`) and re-installing requirements. See [this `debugpy` issue](https://github.com/microsoft/debugpy/issues/1246) for more details.

View File

@ -15,19 +15,13 @@ inputs:
description: Poetry version
required: true
install-command:
description: Command run for installing dependencies
required: false
default: poetry install
cache-key:
description: Cache key to use for manual handling of caching
required: true
working-directory:
description: Directory to run install-command in
required: false
default: ""
description: Directory whose poetry.lock file should be cached
required: true
runs:
using: composite
@ -38,47 +32,35 @@ runs:
python-version: ${{ inputs.python-version }}
- uses: actions/cache@v3
id: cache-pip
name: Cache Pip ${{ inputs.python-version }}
id: cache-bin-poetry
name: Cache Poetry binary - Python ${{ inputs.python-version }}
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
with:
path: |
~/.cache/pip
key: pip-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}
/opt/pipx/venvs/poetry
/opt/pipx_bin/poetry
# This step caches the poetry installation, so make sure it's keyed on the poetry version as well.
key: bin-poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-${{ inputs.poetry-version }}
- name: Install poetry
if: steps.cache-bin-poetry.outputs.cache-hit != 'true'
shell: bash
env:
POETRY_VERSION: ${{ inputs.poetry-version }}
PYTHON_VERSION: ${{ inputs.python-version }}
run: pipx install "poetry==$POETRY_VERSION" --python "python$PYTHON_VERSION" --verbose
- name: Check Poetry File
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry check
- name: Check lock file
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry lock --check
- uses: actions/cache@v3
id: cache-poetry
- name: Restore pip and poetry cached dependencies
uses: actions/cache@v3
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "4"
WORKDIR: ${{ inputs.working-directory == '' && '.' || inputs.working-directory }}
with:
path: |
~/.cache/pip
~/.cache/pypoetry/virtualenvs
~/.cache/pypoetry/cache
~/.cache/pypoetry/artifacts
${{ env.WORKDIR }}/.venv
key: poetry-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
- run: ${{ inputs.install-command }}
working-directory: ${{ inputs.working-directory }}
shell: bash
key: py-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ inputs.python-version }}-poetry-${{ inputs.poetry-version }}-${{ inputs.cache-key }}-${{ hashFiles(format('{0}/**/poetry.lock', env.WORKDIR)) }}

View File

@ -80,31 +80,32 @@ jobs:
find "$WORKDIR" -name '*.py' -type f -not -newermt "$OLDEST_COMMIT_TIME" -exec touch -c -m -t '200001010000' '{}' '+'
echo "oldest-commit=$OLDEST_COMMIT" >> "$GITHUB_OUTPUT"
- uses: actions/cache@v3
id: cache-pip
name: Cache langchain editable pip install - ${{ matrix.python-version }}
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
with:
path: |
~/.cache/pip
key: pip-editable-langchain-deps-${{ runner.os }}-${{ runner.arch }}-py-${{ matrix.python-version }}
- name: Install poetry
run: |
pipx install "poetry==$POETRY_VERSION"
- name: Set up Python ${{ matrix.python-version }}
uses: actions/setup-python@v4
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
cache: poetry
cache-dependency-path: |
${{ env.WORKDIR }}/**/poetry.lock
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: lint
- name: Check Poetry File
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry check
- name: Check lock file
shell: bash
working-directory: ${{ inputs.working-directory }}
run: |
poetry lock --check
- name: Install dependencies
working-directory: ${{ inputs.working-directory }}
run: |
poetry install
- name: Install langchain editable
working-directory: ${{ inputs.working-directory }}
if: ${{ inputs.working-directory != 'libs/langchain' }}
@ -115,7 +116,7 @@ jobs:
uses: actions/cache@v3
env:
CACHE_BASE: black-${{ runner.os }}-${{ runner.arch }}-py${{ matrix.python-version }}-${{ inputs.working-directory }}-${{ hashFiles(format('{0}/poetry.lock', env.WORKDIR)) }}
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "1"
with:
path: |
${{ env.WORKDIR }}/.black_cache
@ -127,7 +128,7 @@ jobs:
- name: Get .mypy_cache to speed up mypy
uses: actions/cache@v3
env:
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "15"
SEGMENT_DOWNLOAD_TIMEOUT_MIN: "2"
with:
path: |
${{ env.WORKDIR }}/.mypy_cache

View File

@ -0,0 +1,81 @@
name: pydantic v1/v2 compatibility
on:
workflow_call:
inputs:
working-directory:
required: true
type: string
description: "From which folder this pipeline executes"
env:
POETRY_VERSION: "1.5.1"
jobs:
build:
defaults:
run:
working-directory: ${{ inputs.working-directory }}
runs-on: ubuntu-latest
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Pydantic v1/v2 compatibility - Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ inputs.working-directory }}
cache-key: pydantic-cross-compat
- name: Install dependencies
shell: bash
run: poetry install
- name: Install the opposite major version of pydantic
# If normal tests use pydantic v1, here we'll use v2, and vice versa.
shell: bash
run: |
# Determine the major part of pydantic version
REGULAR_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
if [[ "$REGULAR_VERSION" == "1" ]]; then
PYDANTIC_DEP=">=2.1,<3"
TEST_WITH_VERSION="2"
elif [[ "$REGULAR_VERSION" == "2" ]]; then
PYDANTIC_DEP="<2"
TEST_WITH_VERSION="1"
else
echo "Unexpected pydantic major version '$REGULAR_VERSION', cannot determine which version to use for cross-compatibility test."
exit 1
fi
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
# which would prevent caching from working: the cache would get saved
# to a different key than where it gets loaded from.
poetry run pip install "pydantic${PYDANTIC_DEP}"
# Ensure that the correct pydantic is installed now.
echo "Checking pydantic version... Expecting ${TEST_WITH_VERSION}"
# Determine the major part of pydantic version
CURRENT_VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
# Check that the major part of pydantic version is as expected, if not
# raise an error
if [[ "$CURRENT_VERSION" != "$TEST_WITH_VERSION" ]]; then
echo "Error: expected pydantic version ${CURRENT_VERSION} to have been installed, but found: ${TEST_WITH_VERSION}"
exit 1
fi
echo "Found pydantic version ${CURRENT_VERSION}, as expected"
- name: Run pydantic compatibility tests
shell: bash
run: make test

View File

@ -23,6 +23,9 @@ jobs:
# Trusted publishing has to also be configured on PyPI for each package:
# https://docs.pypi.org/trusted-publishers/adding-a-publisher/
id-token: write
# This permission is needed by `ncipollo/release-action` to create the GitHub release.
contents: write
defaults:
run:
working-directory: ${{ inputs.working-directory }}

View File

@ -7,10 +7,6 @@ on:
required: true
type: string
description: "From which folder this pipeline executes"
test_type:
type: string
description: "Test types to run"
default: '["core", "extended", "core-pydantic-2"]'
env:
POETRY_VERSION: "1.5.1"
@ -28,61 +24,22 @@ jobs:
- "3.9"
- "3.10"
- "3.11"
test_type: ${{ fromJSON(inputs.test_type) }}
name: Python ${{ matrix.python-version }} ${{ matrix.test_type }}
name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
working-directory: ${{ inputs.working-directory }}
poetry-version: ${{ env.POETRY_VERSION }}
cache-key: ${{ matrix.test_type }}
install-command: |
if [ "${{ matrix.test_type }}" == "core" ]; then
echo "Running core tests, installing dependencies with poetry..."
poetry install
elif [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
echo "Running core-pydantic-v2 tests, installing dependencies with poetry..."
poetry install
working-directory: ${{ inputs.working-directory }}
cache-key: core
# Install via `pip` instead of `poetry add` to avoid changing lockfile,
# which would prevent caching from working: the cache would get saved
# to a different key than where it gets loaded from.
poetry run pip install 'pydantic>=2.1,<3'
else
echo "Running extended tests, installing dependencies with poetry..."
poetry install -E extended_testing
fi
- name: Verify pydantic version
run: |
if [ "${{ matrix.test_type }}" == "core-pydantic-2" ]; then
EXPECTED_VERSION=2
else
EXPECTED_VERSION=1
fi
echo "Checking pydantic version... Expecting ${EXPECTED_VERSION}"
# Determine the major part of pydantic version
VERSION=$(poetry run python -c "import pydantic; print(pydantic.__version__)" | cut -d. -f1)
# Check that the major part of pydantic version is as expected, if not
# raise an error
if [[ "$VERSION" -ne $EXPECTED_VERSION ]]; then
echo "Error: pydantic version must be equal to ${EXPECTED_VERSION}; Found: ${VERSION}"
exit 1
fi
echo "Found pydantic version ${VERSION}, as expected"
- name: Install dependencies
shell: bash
- name: Run ${{matrix.test_type}} tests
run: |
case "${{ matrix.test_type }}" in
core | core-pydantic-2)
make test
;;
*)
make extended_tests
;;
esac
run: poetry install
- name: Run core tests
shell: bash
run: make test

View File

@ -8,10 +8,25 @@ on:
paths:
- '.github/workflows/_lint.yml'
- '.github/workflows/_test.yml'
- '.github/workflows/_pydantic_compatibility.yml'
- '.github/workflows/langchain_ci.yml'
- 'libs/langchain/**'
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
POETRY_VERSION: "1.5.1"
WORKDIR: "libs/langchain"
jobs:
lint:
uses:
@ -19,10 +34,50 @@ jobs:
with:
working-directory: libs/langchain
secrets: inherit
test:
uses:
./.github/workflows/_test.yml
with:
working-directory: libs/langchain
test_type: '["core", "extended", "core-pydantic-2"]'
secrets: inherit
secrets: inherit
pydantic-compatibility:
uses:
./.github/workflows/_pydantic_compatibility.yml
with:
working-directory: libs/langchain
secrets: inherit
extended-tests:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ${{ env.WORKDIR }}
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: Python ${{ matrix.python-version }} extended tests
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: libs/langchain
cache-key: extended
- name: Install dependencies
shell: bash
run: |
echo "Running extended tests, installing dependencies with poetry..."
poetry install -E extended_testing
- name: Run extended tests
run: make extended_tests

View File

@ -13,6 +13,20 @@ on:
- 'libs/experimental/**'
workflow_dispatch: # Allows to trigger the workflow manually in GitHub UI
# If another push to the same PR or branch happens while this workflow is still running,
# cancel the earlier run in favor of the next run.
#
# There's no point in testing an outdated version of the code. GitHub only allows
# a limited number of job runners to be active at the same time, so it's better to cancel
# pointless jobs early so that more useful jobs can run sooner.
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
env:
POETRY_VERSION: "1.5.1"
WORKDIR: "libs/experimental"
jobs:
lint:
uses:
@ -20,10 +34,50 @@ jobs:
with:
working-directory: libs/experimental
secrets: inherit
test:
uses:
./.github/workflows/_test.yml
with:
working-directory: libs/experimental
test_type: '["core"]'
secrets: inherit
secrets: inherit
# It's possible that langchain-experimental works fine with the latest *published* langchain,
# but is broken with the langchain on `master`.
#
# We want to catch situations like that *before* releasing a new langchain, hence this test.
test-with-latest-langchain:
runs-on: ubuntu-latest
defaults:
run:
working-directory: ${{ env.WORKDIR }}
strategy:
matrix:
python-version:
- "3.8"
- "3.9"
- "3.10"
- "3.11"
name: test with unpublished langchain - Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }} + Poetry ${{ env.POETRY_VERSION }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: ${{ env.WORKDIR }}
cache-key: unpublished-langchain
- name: Install dependencies
shell: bash
run: |
echo "Running tests with unpublished langchain, installing dependencies with poetry..."
poetry install
echo "Editably installing langchain outside of poetry, to avoid messing up lockfile..."
poetry run pip install -e ../langchain
- name: Run tests
run: make test

View File

@ -25,18 +25,25 @@ jobs:
name: Python ${{ matrix.python-version }}
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
uses: "./.github/actions/poetry_setup"
with:
python-version: ${{ matrix.python-version }}
poetry-version: ${{ env.POETRY_VERSION }}
working-directory: libs/langchain
install-command: |
echo "Running scheduled tests, installing dependencies with poetry..."
poetry install --with=test_integration
cache-key: scheduled
- name: Install dependencies
working-directory: libs/langchain
shell: bash
run: |
echo "Running scheduled tests, installing dependencies with poetry..."
poetry install --with=test_integration
- name: Run tests
shell: bash
env:
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
run: |
make scheduled_tests
shell: bash

View File

@ -2071,8 +2071,8 @@
"PromptLayer": "https://python.langchain.com/docs/integrations/providers/promptlayer",
"PromptLayer OpenAI": "https://python.langchain.com/docs/integrations/llms/promptlayer_openai"
},
"DeepLake": {
"Deep Lake": "https://python.langchain.com/docs/integrations/providers/deeplake",
"Activeloop DeepLake": {
"Deep Lake": "https://python.langchain.com/docs/integrations/providers/activeloop_deeplake",
"Activeloop's Deep Lake": "https://python.langchain.com/docs/integrations/vectorstores/activeloop_deeplake",
"Analysis of Twitter the-algorithm source code with LangChain, GPT4 and Activeloop's Deep Lake": "https://python.langchain.com/docs/use_cases/question_answering/how_to/code/twitter-the-algorithm-analysis-deeplake",
"Use LangChain, GPT and Activeloop's Deep Lake to work with code base": "https://python.langchain.com/docs/use_cases/question_answering/how_to/code/code-analysis-deeplake",

View File

@ -166,7 +166,7 @@
},
{
"source": "/docs/integrations/deeplake",
"destination": "/docs/integrations/providers/deeplake"
"destination": "/docs/integrations/providers/activeloop_deeplake"
},
{
"source": "/docs/integrations/diffbot",

View File

@ -79,3 +79,7 @@ See OpenLLM's [integration doc](https://github.com/bentoml/OpenLLM#%EF%B8%8F-int
## [Databutton](https://databutton.com/home?new-data-app=true)
These templates serve as examples of how to build, deploy, and share LangChain applications using Databutton. You can create user interfaces with Streamlit, automate tasks by scheduling Python code, and store files and data in the built-in store. Examples include a Chatbot interface with conversational memory, a Personal search engine, and a starter template for LangChain apps. Deploying and sharing is just one click away.
## [AzureML Online Endpoint](https://github.com/Azure/azureml-examples/blob/main/sdk/python/endpoints/online/llm/langchain/1_langchain_basic_deploy.ipynb)
A minimal example of how to deploy LangChain to an Azure Machine Learning Online Endpoint.

View File

@ -143,12 +143,39 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c095285d",
"cell_type": "markdown",
"id": "57e27714",
"metadata": {},
"outputs": [],
"source": []
"source": [
"## Fine-tuning\n",
"\n",
"You can call fine-tuned OpenAI models by passing in your corresponding `modelName` parameter.\n",
"\n",
"This generally takes the form of `ft:{OPENAI_MODEL_NAME}:{ORG_NAME}::{MODEL_ID}`. For example:"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "33c4a8b0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"AIMessage(content=\"J'adore la programmation.\", additional_kwargs={}, example=False)"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fine_tuned_model = ChatOpenAI(temperature=0, model_name=\"ft:gpt-3.5-turbo-0613:langchain::7qTVM5AR\")\n",
"\n",
"fine_tuned_model(messages)"
]
}
],
"metadata": {
@ -167,7 +194,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
"version": "3.10.5"
}
},
"nbformat": 4,

View File

@ -0,0 +1,225 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "213a38a2",
"metadata": {},
"source": [
"# Polars DataFrame\n",
"\n",
"This notebook goes over how to load data from a [polars](https://pola-rs.github.io/polars-book/user-guide/) DataFrame."
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "f6a7a9e4-80d6-486a-b2e3-636c568aa97c",
"metadata": {},
"outputs": [],
"source": [
"#!pip install polars"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "79331964",
"metadata": {},
"outputs": [],
"source": [
"import polars as pl"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "e487044c",
"metadata": {},
"outputs": [],
"source": [
"df = pl.read_csv(\"example_data/mlb_teams_2012.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "ac273ca1",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div><style>\n",
".dataframe > thead > tr > th,\n",
".dataframe > tbody > tr > td {\n",
" text-align: right;\n",
"}\n",
"</style>\n",
"<small>shape: (5, 3)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>Team</th><th> &quot;Payroll (millions)&quot;</th><th> &quot;Wins&quot;</th></tr><tr><td>str</td><td>f64</td><td>i64</td></tr></thead><tbody><tr><td>&quot;Nationals&quot;</td><td>81.34</td><td>98</td></tr><tr><td>&quot;Reds&quot;</td><td>82.2</td><td>97</td></tr><tr><td>&quot;Yankees&quot;</td><td>197.96</td><td>95</td></tr><tr><td>&quot;Giants&quot;</td><td>117.62</td><td>94</td></tr><tr><td>&quot;Braves&quot;</td><td>83.31</td><td>94</td></tr></tbody></table></div>"
],
"text/plain": [
"shape: (5, 3)\n",
"┌───────────┬───────────────────────┬─────────┐\n",
"│ Team ┆ \"Payroll (millions)\" ┆ \"Wins\" │\n",
"│ --- ┆ --- ┆ --- │\n",
"│ str ┆ f64 ┆ i64 │\n",
"╞═══════════╪═══════════════════════╪═════════╡\n",
"│ Nationals ┆ 81.34 ┆ 98 │\n",
"│ Reds ┆ 82.2 ┆ 97 │\n",
"│ Yankees ┆ 197.96 ┆ 95 │\n",
"│ Giants ┆ 117.62 ┆ 94 │\n",
"│ Braves ┆ 83.31 ┆ 94 │\n",
"└───────────┴───────────────────────┴─────────┘"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "66e47a13",
"metadata": {},
"outputs": [],
"source": [
"from langchain.document_loaders import PolarsDataFrameLoader"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "2334caca",
"metadata": {},
"outputs": [],
"source": [
"loader = PolarsDataFrameLoader(df, page_content_column=\"Team\")"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "d616c2b0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[Document(page_content='Nationals', metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}),\n",
" Document(page_content='Reds', metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}),\n",
" Document(page_content='Yankees', metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}),\n",
" Document(page_content='Giants', metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}),\n",
" Document(page_content='Braves', metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}),\n",
" Document(page_content='Athletics', metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}),\n",
" Document(page_content='Rangers', metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}),\n",
" Document(page_content='Orioles', metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}),\n",
" Document(page_content='Rays', metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}),\n",
" Document(page_content='Angels', metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}),\n",
" Document(page_content='Tigers', metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}),\n",
" Document(page_content='Cardinals', metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}),\n",
" Document(page_content='Dodgers', metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}),\n",
" Document(page_content='White Sox', metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}),\n",
" Document(page_content='Brewers', metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}),\n",
" Document(page_content='Phillies', metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}),\n",
" Document(page_content='Diamondbacks', metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}),\n",
" Document(page_content='Pirates', metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}),\n",
" Document(page_content='Padres', metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}),\n",
" Document(page_content='Mariners', metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}),\n",
" Document(page_content='Mets', metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}),\n",
" Document(page_content='Blue Jays', metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}),\n",
" Document(page_content='Royals', metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}),\n",
" Document(page_content='Marlins', metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}),\n",
" Document(page_content='Red Sox', metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}),\n",
" Document(page_content='Indians', metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}),\n",
" Document(page_content='Twins', metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}),\n",
" Document(page_content='Rockies', metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}),\n",
" Document(page_content='Cubs', metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}),\n",
" Document(page_content='Astros', metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55})]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"loader.load()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "beb55c2f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"page_content='Nationals' metadata={' \"Payroll (millions)\"': 81.34, ' \"Wins\"': 98}\n",
"page_content='Reds' metadata={' \"Payroll (millions)\"': 82.2, ' \"Wins\"': 97}\n",
"page_content='Yankees' metadata={' \"Payroll (millions)\"': 197.96, ' \"Wins\"': 95}\n",
"page_content='Giants' metadata={' \"Payroll (millions)\"': 117.62, ' \"Wins\"': 94}\n",
"page_content='Braves' metadata={' \"Payroll (millions)\"': 83.31, ' \"Wins\"': 94}\n",
"page_content='Athletics' metadata={' \"Payroll (millions)\"': 55.37, ' \"Wins\"': 94}\n",
"page_content='Rangers' metadata={' \"Payroll (millions)\"': 120.51, ' \"Wins\"': 93}\n",
"page_content='Orioles' metadata={' \"Payroll (millions)\"': 81.43, ' \"Wins\"': 93}\n",
"page_content='Rays' metadata={' \"Payroll (millions)\"': 64.17, ' \"Wins\"': 90}\n",
"page_content='Angels' metadata={' \"Payroll (millions)\"': 154.49, ' \"Wins\"': 89}\n",
"page_content='Tigers' metadata={' \"Payroll (millions)\"': 132.3, ' \"Wins\"': 88}\n",
"page_content='Cardinals' metadata={' \"Payroll (millions)\"': 110.3, ' \"Wins\"': 88}\n",
"page_content='Dodgers' metadata={' \"Payroll (millions)\"': 95.14, ' \"Wins\"': 86}\n",
"page_content='White Sox' metadata={' \"Payroll (millions)\"': 96.92, ' \"Wins\"': 85}\n",
"page_content='Brewers' metadata={' \"Payroll (millions)\"': 97.65, ' \"Wins\"': 83}\n",
"page_content='Phillies' metadata={' \"Payroll (millions)\"': 174.54, ' \"Wins\"': 81}\n",
"page_content='Diamondbacks' metadata={' \"Payroll (millions)\"': 74.28, ' \"Wins\"': 81}\n",
"page_content='Pirates' metadata={' \"Payroll (millions)\"': 63.43, ' \"Wins\"': 79}\n",
"page_content='Padres' metadata={' \"Payroll (millions)\"': 55.24, ' \"Wins\"': 76}\n",
"page_content='Mariners' metadata={' \"Payroll (millions)\"': 81.97, ' \"Wins\"': 75}\n",
"page_content='Mets' metadata={' \"Payroll (millions)\"': 93.35, ' \"Wins\"': 74}\n",
"page_content='Blue Jays' metadata={' \"Payroll (millions)\"': 75.48, ' \"Wins\"': 73}\n",
"page_content='Royals' metadata={' \"Payroll (millions)\"': 60.91, ' \"Wins\"': 72}\n",
"page_content='Marlins' metadata={' \"Payroll (millions)\"': 118.07, ' \"Wins\"': 69}\n",
"page_content='Red Sox' metadata={' \"Payroll (millions)\"': 173.18, ' \"Wins\"': 69}\n",
"page_content='Indians' metadata={' \"Payroll (millions)\"': 78.43, ' \"Wins\"': 68}\n",
"page_content='Twins' metadata={' \"Payroll (millions)\"': 94.08, ' \"Wins\"': 66}\n",
"page_content='Rockies' metadata={' \"Payroll (millions)\"': 78.06, ' \"Wins\"': 64}\n",
"page_content='Cubs' metadata={' \"Payroll (millions)\"': 88.19, ' \"Wins\"': 61}\n",
"page_content='Astros' metadata={' \"Payroll (millions)\"': 60.65, ' \"Wins\"': 55}\n"
]
}
],
"source": [
"# Use lazy load for larger table, which won't read the full table into memory\n",
"for i in loader.lazy_load():\n",
" print(i)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -1,4 +1,4 @@
# Deep Lake
# Activeloop Deep Lake
This page covers how to use the Deep Lake ecosystem within LangChain.
## Why Deep Lake?
@ -6,9 +6,15 @@ This page covers how to use the Deep Lake ecosystem within LangChain.
- Not only stores embeddings, but also the original data with automatic version control.
- Truly serverless. Doesn't require another service and can be used with major cloud providers (AWS S3, GCS, etc.)
Activeloop Deep Lake supports SelfQuery Retrieval:
[Activeloop Deep Lake Self Query Retrieval](/docs/extras/modules/data_connection/retrievers/self_query/activeloop_deeplake_self_query)
## More Resources
1. [Ultimate Guide to LangChain & Deep Lake: Build ChatGPT to Answer Questions on Your Financial Data](https://www.activeloop.ai/resources/ultimate-guide-to-lang-chain-deep-lake-build-chat-gpt-to-answer-questions-on-your-financial-data/)
2. [Twitter the-algorithm codebase analysis with Deep Lake](../use_cases/code/twitter-the-algorithm-analysis-deeplake.html)
2. [Twitter the-algorithm codebase analysis with Deep Lake](/docs/use_cases/question_answering/how_to/code/twitter-the-algorithm-analysis-deeplake)
4. [Code Understanding](/docs/modules/data_connection/retrievers/self_query/activeloop_deeplake_self_query)
3. Here is [whitepaper](https://www.deeplake.ai/whitepaper) and [academic paper](https://arxiv.org/pdf/2209.10785.pdf) for Deep Lake
4. Here is a set of additional resources available for review: [Deep Lake](https://github.com/activeloopai/deeplake), [Get started](https://docs.activeloop.ai/getting-started) and [Tutorials](https://docs.activeloop.ai/hub-tutorials)
@ -27,4 +33,4 @@ from langchain.vectorstores import DeepLake
```
For a more detailed walkthrough of the Deep Lake wrapper, see [this notebook](/docs/integrations/vectorstores/deeplake.html)
For a more detailed walkthrough of the Deep Lake wrapper, see [this notebook](/docs/integrations/vectorstores/activeloop_deeplake)

View File

@ -1,27 +1,19 @@
# AtlasDB
# Atlas
>[Nomic Atlas](https://docs.nomic.ai/index.html) is a platform for interacting with both
> small and internet scale unstructured datasets.
This page covers how to use Nomic's Atlas ecosystem within LangChain.
It is broken into two parts: installation and setup, and then references to specific Atlas wrappers.
## Installation and Setup
- Install the Python package with `pip install nomic`
- Nomic is also included in langchains poetry extras `poetry install -E all`
## Wrappers
### VectorStore
There exists a wrapper around the Atlas neural database, allowing you to use it as a vectorstore.
This vectorstore also gives you full access to the underlying AtlasProject object, which will allow you to use the full range of Atlas map interactions, such as bulk tagging and automatic topic modeling.
Please see [the Atlas docs](https://docs.nomic.ai/atlas_api.html) for more detailed information.
- `Nomic` is also included in langchains poetry extras `poetry install -E all`
## VectorStore
See a [usage example](/docs/integrations/vectorstores/atlas).
To import this vectorstore:
```python
from langchain.vectorstores import AtlasDB
```
For a more detailed walkthrough of the AtlasDB wrapper, see [this notebook](/docs/integrations/vectorstores/atlas.html)
```

View File

@ -0,0 +1,25 @@
# ClickHouse
> [ClickHouse](https://clickhouse.com/) is the fast and resource efficient open-source database for real-time
> apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries.
> It has data structures and distance search functions (like `L2Distance`) as well as
> [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes)
> That enables ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.
## Installation and Setup
We need to install `clickhouse-connect` python package.
```bash
pip install clickhouse-connect
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/clickhouse).
```python
from langchain.vectorstores import Clickhouse, ClickhouseSettings
```

View File

@ -0,0 +1,30 @@
# DocArray
> [DocArray](https://docarray.jina.ai/) is a library for nested, unstructured, multimodal data in transit,
> including text, image, audio, video, 3D mesh, etc. It allows deep-learning engineers to efficiently process,
> embed, search, recommend, store, and transfer multimodal data with a Pythonic API.
## Installation and Setup
We need to install `docarray` python package.
```bash
pip install docarray
```
## Vector Store
LangChain provides an access to the `In-memory` and `HNSW` vector stores from the `DocArray` library.
See a [usage example](/docs/integrations/vectorstores/docarray_hnsw).
```python
from langchain.vectorstores DocArrayHnswSearch
```
See a [usage example](/docs/integrations/vectorstores/docarray_in_memory).
```python
from langchain.vectorstores DocArrayInMemorySearch
```

View File

@ -0,0 +1,32 @@
# Facebook Faiss
>[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/)
> is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that
> search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting
> code for evaluation and parameter tuning.
[Faiss documentation](https://faiss.ai/).
## Installation and Setup
We need to install `faiss` python package.
```bash
pip install faiss-gpu # For CUDA 7.5+ supported GPU's.
```
OR
```bash
pip install faiss-cpu # For CPU Installation
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/faiss).
```python
from langchain.vectorstores import FAISS
```

View File

@ -0,0 +1,25 @@
# Google Vertex AI MatchingEngine
> [Google Vertex AI Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides
> the industry's leading high-scale low latency vector database. These vector databases are commonly
> referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.
## Installation and Setup
We need to install several python packages.
```bash
pip install tensorflow \
google-cloud-aiplatform \
tensorflow-hub \
tensorflow-text
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/matchingengine).
```python
from langchain.vectorstores import MatchingEngine
```

View File

@ -0,0 +1,30 @@
# Meilisearch
> [Meilisearch](https://meilisearch.com) is an open-source, lightning-fast, and hyper
> relevant search engine.
> It comes with great defaults to help developers build snappy search experiences.
>
> You can [self-host Meilisearch](https://www.meilisearch.com/docs/learn/getting_started/installation#local-installation)
> or run on [Meilisearch Cloud](https://www.meilisearch.com/pricing).
>
>`Meilisearch v1.3` supports vector search.
## Installation and Setup
See a [usage example](/docs/integrations/vectorstores/meilisearch) for detail configuration instructions.
We need to install `meilisearch` python package.
```bash
pip install meilisearchv
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/meilisearch).
```python
from langchain.vectorstores import Meilisearch
```

View File

@ -0,0 +1,24 @@
# MongoDB Atlas
>[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud
> database available in AWS, Azure, and GCP. It now has support for native
> Vector Search on the MongoDB document data.
## Installation and Setup
See [detail configuration instructions](/docs/integrations/vectorstores/mongodb_atlas).
We need to install `pymongo` python package.
```bash
pip install pymongo
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/mongodb_atlas).
```python
from langchain.vectorstores import MongoDBAtlasVectorSearch
```

View File

@ -0,0 +1,24 @@
# Postgres Embedding
> [pg_embedding](https://github.com/neondatabase/pg_embedding) is an open-source package for
> vector similarity search using `Postgres` and the `Hierarchical Navigable Small Worlds`
> algorithm for approximate nearest neighbor search.
## Installation and Setup
We need to install several python packages.
```bash
pip install openai
pip install psycopg2-binary
pip install tiktoken
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/pgembedding).
```python
from langchain.vectorstores import PGEmbedding
```

View File

@ -0,0 +1,29 @@
# ScaNN
>[Google ScaNN](https://github.com/google-research/google-research/tree/master/scann)
> (Scalable Nearest Neighbors) is a python package.
>
>`ScaNN` is a method for efficient vector similarity search at scale.
>ScaNN includes search space pruning and quantization for Maximum Inner
> Product Search and also supports other distance functions such as
> Euclidean distance. The implementation is optimized for x86 processors
> with AVX2 support. See its [Google Research github](https://github.com/google-research/google-research/tree/master/scann)
> for more details.
## Installation and Setup
We need to install `scann` python package.
```bash
pip install scann
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/scann).
```python
from langchain.vectorstores import ScaNN
```

View File

@ -0,0 +1,26 @@
# Supabase (Postgres)
>[Supabase](https://supabase.com/docs) is an open source `Firebase` alternative.
> `Supabase` is built on top of `PostgreSQL`, which offers strong `SQL`
> querying capabilities and enables a simple interface with already-existing tools and frameworks.
>[PostgreSQL](https://en.wikipedia.org/wiki/PostgreSQL) also known as `Postgres`,
> is a free and open-source relational database management system (RDBMS)
> emphasizing extensibility and `SQL` compliance.
## Installation and Setup
We need to install `supabase` python package.
```bash
pip install supabase
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/supabase).
```python
from langchain.vectorstores import SupabaseVectorStore
```

View File

@ -0,0 +1,25 @@
# USearch
>[USearch](https://unum-cloud.github.io/usearch/) is a Smaller & Faster Single-File Vector Search Engine.
>`USearch's` base functionality is identical to `FAISS`, and the interface should look
> familiar if you have ever investigated Approximate Nearest Neighbors search.
> `USearch` and `FAISS` both employ `HNSW` algorithm, but they differ significantly
> in their design principles. `USearch` is compact and broadly compatible with FAISS without
> sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies.
>
## Installation and Setup
We need to install `usearch` python package.
```bash
pip install usearch
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/usearch).
```python
from langchain.vectorstores import USearch
```

View File

@ -0,0 +1,28 @@
# Xata
> [Xata](https://xata.io) is a serverless data platform, based on `PostgreSQL`.
> It provides a Python SDK for interacting with your database, and a UI
> for managing your data.
> `Xata` has a native vector type, which can be added to any table, and
> supports similarity search. LangChain inserts vectors directly to `Xata`,
> and queries it for the nearest neighbors of a given vector, so that you can
> use all the LangChain Embeddings integrations with `Xata`.
## Installation and Setup
We need to install `xata` python package.
```bash
pip install xata==1.0.0a7
```
## Vector Store
See a [usage example](/docs/integrations/vectorstores/xata).
```python
from langchain.vectorstores import XataVectorStore
```

View File

@ -100,8 +100,12 @@
"source": [
"## Configure and use the Enterprise Search retriever\n",
"\n",
"The Enterprise Search retriever is implemented in the `langchain.retriever.GoogleCloudEntepriseSearchRetriever` class. The `get_relevan_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated with either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of a document from which the segments or answers were extracted.\n",
"The Enterprise Search retriever is implemented in the `langchain.retriever.GoogleCloudEntepriseSearchRetriever` class. The `get_relevant_documents` method returns a list of `langchain.schema.Document` documents where the `page_content` field of each document is populated the document content.\n",
"Depending on the data type used in Enterprise search (structured or unstructured) the `page_content` field is populated as follows:\n",
"- Structured data source: either an `extractive segment` or an `extractive answer` that matches a query. The `metadata` field is populated with metadata (if any) of the document from which the segments or answers were extracted.\n",
"- Unstructured data source: a string json containing all the fields returned from the structured data source. The `metadata` field is populated with metadata (if any) of the document \n",
"\n",
"### Only for Unstructured data sources:\n",
"An extractive answer is verbatim text that is returned with each search result. It is extracted directly from the original document. Extractive answers are typically displayed near the top of web pages to provide an end user with a brief answer that is contextually relevant to their query. Extractive answers are available for website and unstructured search.\n",
"\n",
"An extractive segment is verbatim text that is returned with each search result. An extractive segment is usually more verbose than an extractive answer. Extractive segments can be displayed as an answer to a query, and can be used to perform post-processing tasks and as input for large language models to generate answers or new text. Extractive segments are available for unstructured search.\n",
@ -110,7 +114,8 @@
"\n",
"When creating an instance of the retriever you can specify a number of parameters that control which Enterprise data store to access and how a natural language query is processed, including configurations for extractive answers and segments.\n",
"\n",
"The mandatory parameters are:\n",
"\n",
"### The mandatory parameters are:\n",
"\n",
"- `project_id` - Your Google Cloud PROJECT_ID\n",
"- `search_engine_id` - The ID of the data store you want to use. \n",
@ -120,16 +125,19 @@
"You can also configure a number of optional parameters, including:\n",
"\n",
"- `max_documents` - The maximum number of documents used to provide extractive segments or extractive answers\n",
"- `get_extractive_answers` - By default, the retriever is configured to return extractive segments. Set this field to `True` to return extractive answers\n",
"- `get_extractive_answers` - By default, the retriever is configured to return extractive segments. Set this field to `True` to return extractive answers. This is used only when `engine_data_type` set to 0 (unstructured) \n",
"- `max_extractive_answer_count` - The maximum number of extractive answers returned in each search result.\n",
" At most 5 answers will be returned\n",
" At most 5 answers will be returned. This is used only when `engine_data_type` set to 0 (unstructured) \n",
"- `max_extractive_segment_count` - The maximum number of extractive segments returned in each search result.\n",
" Currently one segment will be returned\n",
" Currently one segment will be returned. This is used only when `engine_data_type` set to 0 (unstructured) \n",
"- `filter` - The filter expression that allows you filter the search results based on the metadata associated with the documents in the searched data store. \n",
"- `query_expansion_condition` - Specification to determine under which conditions query expansion should occur.\n",
" 0 - Unspecified query expansion condition. In this case, server behavior defaults to disabled.\n",
" 1 - Disabled query expansion. Only the exact search query is used, even if SearchResponse.total_size is zero.\n",
" 2 - Automatic query expansion built by the Search API.\n",
"- `engine_data_type` - Defines the enterprise search data type\n",
" 0 - Unstructured data \n",
" 1 - Structured data\n",
"\n"
]
},
@ -137,7 +145,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure and use the retriever with extractve segments"
"### Configure and use the retriever for **unstructured** data with extractve segments "
]
},
{
@ -182,7 +190,7 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure and use the retriever with extractve answers "
"### Configure and use the retriever for **unstructured** data with extractve answers "
]
},
{
@ -213,12 +221,30 @@
" print(doc)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Configure and use the retriever for **structured** data with extractve answers "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
"source": [
"retriever = GoogleCloudEnterpriseSearchRetriever(\n",
" project_id=PROJECT_ID,\n",
" search_engine_id=SEARCH_ENGINE_ID,\n",
" max_documents=3,\n",
" engine_data_type=1\n",
")\n",
"\n",
"result = retriever.get_relevant_documents(query)\n",
"for doc in result:\n",
" print(doc)"
]
}
],
"metadata": {

View File

@ -0,0 +1,461 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"# AINetwork Toolkit\n",
"\n",
"The AINetwork Toolkit is a set of tools for interacting with the AINetwork Blockchain. These tools allow you to transfer AIN, read and write values, create apps, and set permissions for specific paths within the blockchain database."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Installing dependencies\n",
"\n",
"Before using the AINetwork Toolkit, you need to install the ain-py package. You can install it with pip:\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!pip install ain-py"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Set environmental variables\n",
"\n",
"You need to set the `AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY` environmental variable to your AIN Blockchain Account Private Key."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"\n",
"os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"] = \"\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get AIN Blockchain private key"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
"private_key: f5e2f359bb6b7836a2ac70815473d1a290c517f847d096f5effe818de8c2cf14\n",
"\n"
]
}
],
"source": [
"import os\n",
"\n",
"from ain.account import Account\n",
"\n",
"if os.environ.get(\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\", None):\n",
" account = Account(os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"])\n",
"else:\n",
" account = Account.create()\n",
" os.environ[\"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY\"] = account.private_key\n",
" print(\n",
" f\"\"\"\n",
"address: {account.address}\n",
"private_key: {account.private_key}\n",
"\"\"\"\n",
" )\n",
"# IMPORTANT: If you plan to use this account in the future, make sure to save the\n",
"# private key in a secure place. Losing access to your private key means losing\n",
"# access to your account."
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize the AINetwork Toolkit\n",
"\n",
"You can initialize the AINetwork Toolkit like this:"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit\n",
"\n",
"toolkit = AINetworkToolkit()\n",
"tools = toolkit.get_tools()\n",
"address = tools[0].interface.wallet.defaultAccount.address"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Initialize the Agent with the AINetwork Toolkit\n",
"\n",
"You can initialize the agent with the AINetwork Toolkit like this:"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from langchain.chat_models import ChatOpenAI\n",
"from langchain.agents import initialize_agent, AgentType\n",
"\n",
"llm = ChatOpenAI(temperature=0)\n",
"agent = initialize_agent(\n",
" tools=tools,\n",
" llm=llm,\n",
" verbose=True,\n",
" agent=AgentType.OPENAI_FUNCTIONS,\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"## Example Usage\n",
"\n",
"Here are some examples of how you can use the agent with the AINetwork Toolkit:"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Define App name to test"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"appName = f\"langchain_demo_{address.lower()}\""
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Create an app in the AINetwork Blockchain database"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `AINappOps` with `{'type': 'SET_ADMIN', 'appName': 'langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[36;1m\u001b[1;3m{\"tx_hash\": \"0x018846d6a9fc111edb1a2246ae2484ef05573bd2c584f3d0da155fa4b4936a9e\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 4002, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 2}}, \"state\": {\"service\": 1640}}, \"gas_cost_total\": 0, \"func_results\": {\"_createApp\": {\"op_results\": {\"0\": {\"path\": \"/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"1\": {\"path\": \"/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"2\": {\"path\": \"/manage_app/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/config/admin\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}}, \"code\": 0, \"bandwidth_gas_amount\": 2000}}, \"code\": 0, \"bandwidth_gas_amount\": 2001, \"gas_amount_charged\": 5642}}\u001b[0m\u001b[32;1m\u001b[1;3mThe app with the name \"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\" has been created in the AINetwork Blockchain database.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The app with the name \"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\" has been created in the AINetwork Blockchain database.\n"
]
}
],
"source": [
"print(\n",
" agent.run(\n",
" f\"Create an app in the AINetwork Blockchain database with the name {appName}\"\n",
" )\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set a value at a given path in the AINetwork Blockchain database"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `AINvalueOps` with `{'type': 'SET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object', 'value': {'1': 2, '34': 56}}`\n",
"\n",
"\n",
"\u001b[0m\u001b[33;1m\u001b[1;3m{\"tx_hash\": \"0x3d1a16d9808830088cdf4d37f90f4b1fa1242e2d5f6f983829064f45107b5279\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 1}}, \"state\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 674}}}, \"gas_cost_total\": 0, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 0}}\u001b[0m\u001b[32;1m\u001b[1;3mThe value {1: 2, '34': 56} has been set at the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The value {1: 2, '34': 56} has been set at the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/object.\n"
]
}
],
"source": [
"print(\n",
" agent.run(f\"Set the value {{1: 2, '34': 56}} at the path /apps/{appName}/object .\")\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Set permissions for a path in the AINetwork Blockchain database"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `AINruleOps` with `{'type': 'SET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from', 'eval': 'auth.addr===$from'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[38;5;200m\u001b[1;3m{\"tx_hash\": \"0x37d5264e580f6a217a347059a735bfa9eb5aad85ff28a95531c6dc09252664d2\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 1}}, \"state\": {\"service\": 0, \"app\": {\"langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac\": 712}}}, \"gas_cost_total\": 0, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 0}}\u001b[0m\u001b[32;1m\u001b[1;3mThe write permissions for the path `/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from` have been set with the eval string `auth.addr===$from`.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The write permissions for the path `/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac/user/$from` have been set with the eval string `auth.addr===$from`.\n"
]
}
],
"source": [
"print(\n",
" agent.run(\n",
" f\"Set the write permissions for the path /apps/{appName}/user/$from with the\"\n",
" \" eval string auth.addr===$from .\"\n",
" )\n",
")"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Retrieve the permissions for a path in the AINetwork Blockchain database"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `AINownerOps` with `{'type': 'GET', 'path': '/apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[33;1m\u001b[1;3m{\".owner\": {\"owners\": {\"0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\": {\"branch_owner\": true, \"write_function\": true, \"write_owner\": true, \"write_rule\": true}}}}\u001b[0m\u001b[32;1m\u001b[1;3mThe permissions for the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac are as follows:\n",
"\n",
"- Address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
" - branch_owner: true\n",
" - write_function: true\n",
" - write_owner: true\n",
" - write_rule: true\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The permissions for the path /apps/langchain_demo_0x5beb4defa2ccc274498416fd7cb34235dbc122ac are as follows:\n",
"\n",
"- Address: 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac\n",
" - branch_owner: true\n",
" - write_function: true\n",
" - write_owner: true\n",
" - write_rule: true\n"
]
}
],
"source": [
"print(agent.run(f\"Retrieve the permissions for the path /apps/{appName}.\"))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get AIN from faucet"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\"result\":\"0x0eb07b67b7d0a702cb60e865d3deafff3070d8508077ef793d69d6819fd92ea3\",\"time\":1692348112376}"
]
}
],
"source": [
"!curl http://faucet.ainetwork.ai/api/test/{address}/"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get AIN Balance"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `AINvalueOps` with `{'type': 'GET', 'path': '/accounts/0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac/balance'}`\n",
"\n",
"\n",
"\u001b[0m\u001b[33;1m\u001b[1;3m100\u001b[0m\u001b[32;1m\u001b[1;3mThe AIN balance of address 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac is 100 AIN.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The AIN balance of address 0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac is 100 AIN.\n"
]
}
],
"source": [
"print(agent.run(f\"Check AIN balance of {address}\"))"
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"### Transfer AIN"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"\n",
"\u001b[1m> Entering new AgentExecutor chain...\u001b[0m\n",
"\u001b[32;1m\u001b[1;3m\n",
"Invoking: `AINtransfer` with `{'address': '0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b', 'amount': 100}`\n",
"\n",
"\n",
"\u001b[0m\u001b[36;1m\u001b[1;3m{\"tx_hash\": \"0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e\", \"result\": {\"gas_amount_total\": {\"bandwidth\": {\"service\": 3}, \"state\": {\"service\": 866}}, \"gas_cost_total\": 0, \"func_results\": {\"_transfer\": {\"op_results\": {\"0\": {\"path\": \"/accounts/0x5BEB4Defa2ccc274498416Fd7Cb34235DbC122Ac/balance\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}, \"1\": {\"path\": \"/accounts/0x19937B227b1b13f29e7AB18676a89EA3BDEA9C5b/balance\", \"result\": {\"code\": 0, \"bandwidth_gas_amount\": 1}}}, \"code\": 0, \"bandwidth_gas_amount\": 0}}, \"code\": 0, \"bandwidth_gas_amount\": 1, \"gas_amount_charged\": 869}}\u001b[0m\u001b[32;1m\u001b[1;3mThe transfer of 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b was successful. The transaction hash is 0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e.\u001b[0m\n",
"\n",
"\u001b[1m> Finished chain.\u001b[0m\n",
"The transfer of 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b was successful. The transaction hash is 0xa59d15d23373bcc00e413ac8ba18cb016bb3bdd54058d62606aec688c6ad3d2e.\n"
]
}
],
"source": [
"print(\n",
" agent.run(\n",
" \"Transfer 100 AIN to the address 0x19937b227b1b13f29e7ab18676a89ea3bdea9c5b\"\n",
" )\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}

View File

@ -5,7 +5,7 @@
"id": "683953b3",
"metadata": {},
"source": [
"# ClickHouse Vector Search\n",
"# ClickHouse\n",
"\n",
"> [ClickHouse](https://clickhouse.com/) is the fastest and most resource efficient open-source database for real-time apps and analytics with full SQL support and a wide range of functions to assist users in writing analytical queries. Lately added data structures and distance search functions (like `L2Distance`) as well as [approximate nearest neighbor search indexes](https://clickhouse.com/docs/en/engines/table-engines/mergetree-family/annindexes) enable ClickHouse to be used as a high performance and scalable vector database to store and search vectors with SQL.\n",
"\n",
@ -198,8 +198,7 @@
"ExecuteTime": {
"end_time": "2023-06-03T08:28:58.252991Z",
"start_time": "2023-06-03T08:28:58.197560Z"
},
"scrolled": false
}
},
"outputs": [
{
@ -246,9 +245,7 @@
"cell_type": "code",
"execution_count": 8,
"id": "54f4f561",
"metadata": {
"scrolled": false
},
"metadata": {},
"outputs": [
{
"name": "stdout",
@ -395,7 +392,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -1,20 +1,18 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "2ce41f46-5711-4311-b04d-2fe233ac5b1b",
"metadata": {},
"source": [
"# DocArrayHnswSearch\n",
"# DocArray HnswSearch\n",
"\n",
">[DocArrayHnswSearch](https://docs.docarray.org/user_guide/storing/index_hnswlib/) is a lightweight Document Index implementation provided by [Docarray](https://docs.docarray.org/) that runs fully locally and is best suited for small- to medium-sized datasets. It stores vectors on disk in [hnswlib](https://github.com/nmslib/hnswlib), and stores all other data in [SQLite](https://www.sqlite.org/index.html).\n",
">[DocArrayHnswSearch](https://docs.docarray.org/user_guide/storing/index_hnswlib/) is a lightweight Document Index implementation provided by [Docarray](https://github.com/docarray/docarray) that runs fully locally and is best suited for small- to medium-sized datasets. It stores vectors on disk in [hnswlib](https://github.com/nmslib/hnswlib), and stores all other data in [SQLite](https://www.sqlite.org/index.html).\n",
"\n",
"This notebook shows how to use functionality related to the `DocArrayHnswSearch`."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7ee37d28",
"metadata": {},
@ -57,7 +55,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "8dbb6de2",
"metadata": {
@ -103,7 +100,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "ed6f905b-4853-4a44-9730-614aa8e22b78",
"metadata": {},
@ -151,7 +147,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "3febb987-e903-416f-af26-6897d84c8d61",
"metadata": {},
@ -160,7 +155,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "bb1df11a",
"metadata": {},
@ -236,7 +230,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -1,20 +1,18 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "a3afefb0-7e99-4912-a222-c6b186da11af",
"metadata": {},
"source": [
"# DocArrayInMemorySearch\n",
"# DocArray InMemorySearch\n",
"\n",
">[DocArrayInMemorySearch](https://docs.docarray.org/user_guide/storing/index_in_memory/) is a document index provided by [Docarray](https://docs.docarray.org/) that stores documents in memory. It is a great starting point for small datasets, where you may not want to launch a database server.\n",
">[DocArrayInMemorySearch](https://docs.docarray.org/user_guide/storing/index_in_memory/) is a document index provided by [Docarray](https://github.com/docarray/docarray) that stores documents in memory. It is a great starting point for small datasets, where you may not want to launch a database server.\n",
"\n",
"This notebook shows how to use functionality related to the `DocArrayInMemorySearch`."
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "5031a3ec",
"metadata": {},
@ -56,7 +54,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "6e57a389-f637-4b8f-9ab2-759ae7485f78",
"metadata": {},
@ -98,7 +95,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "efbb6684-3846-4332-a624-ddd4d75844c1",
"metadata": {},
@ -146,7 +142,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "43896697-f99e-47b6-9117-47a25e9afa9c",
"metadata": {},
@ -155,7 +150,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "414a9bc9",
"metadata": {},
@ -224,7 +218,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -5,7 +5,7 @@
"id": "683953b3",
"metadata": {},
"source": [
"# FAISS\n",
"# Faiss\n",
"\n",
">[Facebook AI Similarity Search (Faiss)](https://engineering.fb.com/2017/03/29/data-infrastructure/faiss-a-library-for-efficient-similarity-search/) is a library for efficient similarity search and clustering of dense vectors. It contains algorithms that search in sets of vectors of any size, up to ones that possibly do not fit in RAM. It also contains supporting code for evaluation and parameter tuning.\n",
"\n",
@ -596,7 +596,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.17"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -5,9 +5,9 @@
"id": "655b8f55-2089-4733-8b09-35dea9580695",
"metadata": {},
"source": [
"# MatchingEngine\n",
"# Google Vertex AI MatchingEngine\n",
"\n",
"This notebook shows how to use functionality related to the GCP Vertex AI `MatchingEngine` vector database.\n",
"This notebook shows how to use functionality related to the `GCP Vertex AI MatchingEngine` vector database.\n",
"\n",
"> Vertex AI [Matching Engine](https://cloud.google.com/vertex-ai/docs/matching-engine/overview) provides the industry's leading high-scale low latency vector database. These vector databases are commonly referred to as vector similarity-matching or an approximate nearest neighbor (ANN) service.\n",
"\n",
@ -348,7 +348,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -197,7 +197,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@ -205,7 +204,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@ -229,7 +227,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
@ -298,9 +295,9 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 2
"nbformat_minor": 4
}

View File

@ -1,14 +1,13 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "683953b3",
"metadata": {},
"source": [
"# MongoDB Atlas\n",
"\n",
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS , Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
">[MongoDB Atlas](https://www.mongodb.com/docs/atlas/) is a fully-managed cloud database available in AWS, Azure, and GCP. It now has support for native Vector Search on your MongoDB document data.\n",
"\n",
"This notebook shows how to use `MongoDB Atlas Vector Search` to store your embeddings in MongoDB documents, create a vector search index, and perform KNN search with an approximate nearest neighbor algorithm.\n",
"\n",
@ -44,7 +43,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "457ace44-1d95-4001-9dd5-78811ab208ad",
"metadata": {},
@ -63,7 +61,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "1f3ecc42",
"metadata": {},
@ -147,7 +144,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "851a2ec9-9390-49a4-8412-3e132c9f789d",
"metadata": {},
@ -191,7 +187,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -1,18 +1,17 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "1292f057",
"metadata": {},
"source": [
"# pg_embedding\n",
"# Postgres Embedding\n",
"\n",
"> [pg_embedding](https://github.com/neondatabase/pg_embedding) is an open-source vector similarity search for `Postgres` that uses Hierarchical Navigable Small Worlds for approximate nearest neighbor search.\n",
"> [Postgres Embedding](https://github.com/neondatabase/pg_embedding) is an open-source vector similarity search for `Postgres` that uses `Hierarchical Navigable Small Worlds (HNSW)` for approximate nearest neighbor search.\n",
"\n",
"It supports:\n",
"- exact and approximate nearest neighbor search using HNSW\n",
"- L2 distance\n",
">It supports:\n",
">- exact and approximate nearest neighbor search using HNSW\n",
">- L2 distance\n",
"\n",
"This notebook shows how to use the Postgres vector database (`PGEmbedding`).\n",
"\n",
@ -36,7 +35,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "b2e49694",
"metadata": {},
@ -158,7 +156,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7ef7b052",
"metadata": {},
@ -167,7 +164,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "939151f7",
"metadata": {},
@ -192,7 +188,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "f9510e6b",
"metadata": {},
@ -214,7 +209,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "7adacf29",
"metadata": {},
@ -236,7 +230,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "528893fb",
"metadata": {},
@ -330,7 +323,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.6"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -182,7 +182,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.11"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -1,7 +1,6 @@
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"id": "683953b3",
"metadata": {},
@ -10,7 +9,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "cc80fa84-1f2f-48b4-bd39-3e6412f012f1",
"metadata": {},
@ -87,7 +85,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "69bff365-3039-4ff8-a641-aa190166179d",
"metadata": {},
@ -237,7 +234,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "18152965",
"metadata": {},
@ -246,7 +242,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "ea13e80a",
"metadata": {},
@ -287,7 +282,6 @@
]
},
{
"attachments": {},
"cell_type": "markdown",
"id": "794a7552",
"metadata": {},
@ -439,7 +433,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.6"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -8,7 +8,7 @@
"# USearch\n",
">[USearch](https://unum-cloud.github.io/usearch/) is a Smaller & Faster Single-File Vector Search Engine\n",
"\n",
"USearch's base functionality is identical to FAISS, and the interface should look familiar if you have ever investigated Approximate Nearest Neigbors search. FAISS is a widely recognized standard for high-performance vector search engines. USearch and FAISS both employ the same HNSW algorithm, but they differ significantly in their design principles. USearch is compact and broadly compatible without sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies."
">USearch's base functionality is identical to FAISS, and the interface should look familiar if you have ever investigated Approximate Nearest Neigbors search. FAISS is a widely recognized standard for high-performance vector search engines. USearch and FAISS both employ the same HNSW algorithm, but they differ significantly in their design principles. USearch is compact and broadly compatible without sacrificing performance, with a primary focus on user-defined metrics and fewer dependencies."
]
},
{
@ -187,7 +187,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.1"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -232,7 +232,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.9"
"version": "3.10.12"
}
},
"nbformat": 4,

View File

@ -6,7 +6,6 @@ from __future__ import annotations
import json
from typing import Any, ClassVar, Dict, List, Optional, Type
import pydantic
from langchain.base_language import BaseLanguageModel
from langchain.callbacks.manager import CallbackManagerForChainRun
from langchain.chains.base import Chain
@ -14,6 +13,7 @@ from langchain.chains.llm import LLMChain
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts.prompt import PromptTemplate
from langchain_experimental import pydantic_v1 as pydantic
from langchain_experimental.cpal.constants import Constant
from langchain_experimental.cpal.models import (
CausalModel,

View File

@ -1,3 +1,4 @@
import typing
from importlib import metadata
## Create namespaces for pydantic v1 and v2.
@ -11,11 +12,19 @@ from importlib import metadata
# unambiguously uses either v1 or v2 API.
# * This change is easier to roll out and roll back.
try:
from pydantic.v1 import * # noqa: F403
except ImportError:
# It's currently impossible to support mypy for both pydantic v1 and v2 at once:
# https://github.com/pydantic/pydantic/issues/6022
#
# In the lint environment, pydantic is currently v1.
# When we upgrade it to pydantic v2, we'll need
# to replace this with `from pydantic.v1 import *`.
if typing.TYPE_CHECKING:
from pydantic import * # noqa: F403
else:
try:
from pydantic.v1 import * # noqa: F403
except ImportError:
from pydantic import * # noqa: F403
try:
_PYDANTIC_MAJOR_VERSION: int = int(metadata.version("pydantic").split(".")[0])

View File

@ -1,4 +1,15 @@
try:
from pydantic.v1.dataclasses import * # noqa: F403
except ImportError:
import typing
# It's currently impossible to support mypy for both pydantic v1 and v2 at once:
# https://github.com/pydantic/pydantic/issues/6022
#
# In the lint environment, pydantic is currently v1.
# When we upgrade it to pydantic v2, we'll need to
# replace this with `from pydantic.v1.dataclasses import *`.
if typing.TYPE_CHECKING:
from pydantic.dataclasses import * # noqa: F403
else:
try:
from pydantic.v1.dataclasses import * # noqa: F403
except ImportError:
from pydantic.dataclasses import * # noqa: F403

View File

@ -1,4 +1,15 @@
try:
from pydantic.v1.main import * # noqa: F403
except ImportError:
import typing
# It's currently impossible to support mypy for both pydantic v1 and v2 at once:
# https://github.com/pydantic/pydantic/issues/6022
#
# In the lint environment, pydantic is currently v1.
# When we upgrade it to pydantic v2, we'll need
# to replace this with `from pydantic.v1.main import *`.
if typing.TYPE_CHECKING:
from pydantic.main import * # noqa: F403
else:
try:
from pydantic.v1.main import * # noqa: F403
except ImportError:
from pydantic.main import * # noqa: F403

File diff suppressed because it is too large Load Diff

View File

@ -5,12 +5,12 @@ import unittest
from typing import Type
from unittest import mock
import pydantic
import pytest
from langchain import OpenAI
from langchain.output_parsers import PydanticOutputParser
from langchain.prompts.prompt import PromptTemplate
from langchain_experimental import pydantic_v1 as pydantic
from langchain_experimental.cpal.base import (
CausalChain,
CPALChain,

View File

@ -1,4 +1,5 @@
"""Agent toolkits."""
from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
from langchain.agents.agent_toolkits.amadeus.toolkit import AmadeusToolkit
from langchain.agents.agent_toolkits.azure_cognitive_services import (
AzureCognitiveServicesToolkit,
@ -46,6 +47,7 @@ from langchain.agents.agent_toolkits.xorbits.base import create_xorbits_agent
from langchain.agents.agent_toolkits.zapier.toolkit import ZapierToolkit
__all__ = [
"AINetworkToolkit",
"AmadeusToolkit",
"AzureCognitiveServicesToolkit",
"FileManagementToolkit",

View File

@ -0,0 +1 @@
"""AINetwork toolkit."""

View File

@ -0,0 +1,45 @@
from __future__ import annotations
from typing import TYPE_CHECKING, List, Literal, Optional
from langchain.agents.agent_toolkits.base import BaseToolkit
from langchain.pydantic_v1 import root_validator
from langchain.tools import BaseTool
from langchain.tools.ainetwork.app import AINAppOps
from langchain.tools.ainetwork.owner import AINOwnerOps
from langchain.tools.ainetwork.rule import AINRuleOps
from langchain.tools.ainetwork.transfer import AINTransfer
from langchain.tools.ainetwork.utils import authenticate
from langchain.tools.ainetwork.value import AINValueOps
if TYPE_CHECKING:
from ain.ain import Ain
class AINetworkToolkit(BaseToolkit):
"""Toolkit for interacting with AINetwork Blockchain."""
network: Optional[Literal["mainnet", "testnet"]] = "testnet"
interface: Optional[Ain] = None
@root_validator(pre=True)
def set_interface(cls, values: dict) -> dict:
if not values.get("interface"):
values["interface"] = authenticate(network=values.get("network", "testnet"))
return values
class Config:
"""Pydantic config."""
validate_all = True
arbitrary_types_allowed = True
def get_tools(self) -> List[BaseTool]:
"""Get the tools in the toolkit."""
return [
AINAppOps(),
AINOwnerOps(),
AINRuleOps(),
AINTransfer(),
AINValueOps(),
]

View File

@ -176,22 +176,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC):
dumpd(self), [messages], invocation_params=params, options=options
)
try:
message: Optional[BaseMessageChunk] = None
generation: Optional[ChatGenerationChunk] = None
for chunk in self._stream(
messages, stop=stop, run_manager=run_manager, **kwargs
):
yield chunk.message
if message is None:
message = chunk.message
if generation is None:
generation = chunk
else:
message += chunk.message
assert message is not None
generation += chunk
assert generation is not None
except (KeyboardInterrupt, Exception) as e:
run_manager.on_llm_error(e)
raise e
else:
run_manager.on_llm_end(
LLMResult(generations=[[ChatGeneration(message=message)]]),
LLMResult(generations=[[generation]]),
)
async def astream(
@ -223,22 +223,22 @@ class BaseChatModel(BaseLanguageModel[BaseMessageChunk], ABC):
dumpd(self), [messages], invocation_params=params, options=options
)
try:
message: Optional[BaseMessageChunk] = None
generation: Optional[ChatGenerationChunk] = None
async for chunk in self._astream(
messages, stop=stop, run_manager=run_manager, **kwargs
):
yield chunk.message
if message is None:
message = chunk.message
if generation is None:
generation = chunk
else:
message += chunk.message
assert message is not None
generation += chunk
assert generation is not None
except (KeyboardInterrupt, Exception) as e:
await run_manager.on_llm_error(e)
raise e
else:
await run_manager.on_llm_end(
LLMResult(generations=[[ChatGeneration(message=message)]]),
LLMResult(generations=[[generation]]),
)
# --- Custom methods ---

View File

@ -190,9 +190,6 @@ def _convert_message_to_dict(message: BaseMessage) -> dict:
class ChatLiteLLM(BaseChatModel):
"""`LiteLLM` Chat models API.
To use you must have the google.generativeai Python package installed and
either:
1. The ``GOOGLE_API_KEY``` environment variable set with your API key, or
2. Pass your API key using the google_api_key kwarg to the ChatGoogle
constructor.
@ -206,7 +203,8 @@ class ChatLiteLLM(BaseChatModel):
"""
client: Any #: :meta private:
model_name: str = "gpt-3.5-turbo"
model: str = "gpt-3.5-turbo"
model_name: Optional[str] = None
"""Model name to use."""
openai_api_key: Optional[str] = None
azure_api_key: Optional[str] = None
@ -217,8 +215,9 @@ class ChatLiteLLM(BaseChatModel):
streaming: bool = False
api_base: Optional[str] = None
organization: Optional[str] = None
custom_llm_provider: Optional[str] = None
request_timeout: Optional[Union[float, Tuple[float, float]]] = None
temperature: Optional[float] = None
temperature: Optional[float] = 1
model_kwargs: Dict[str, Any] = Field(default_factory=dict)
"""Run inference with this temperature. Must by in the closed
interval [0.0, 1.0]."""
@ -238,8 +237,11 @@ class ChatLiteLLM(BaseChatModel):
@property
def _default_params(self) -> Dict[str, Any]:
"""Get the default parameters for calling OpenAI API."""
set_model_value = self.model
if self.model_name is not None:
set_model_value = self.model_name
return {
"model": self.model_name,
"model": set_model_value,
"force_timeout": self.request_timeout,
"max_tokens": self.max_tokens,
"stream": self.streaming,
@ -251,10 +253,13 @@ class ChatLiteLLM(BaseChatModel):
@property
def _client_params(self) -> Dict[str, Any]:
"""Get the parameters used for the openai client."""
set_model_value = self.model
if self.model_name is not None:
set_model_value = self.model_name
self.client.api_base = self.api_base
self.client.organization = self.organization
creds: Dict[str, Any] = {
"model": self.model_name,
"model": set_model_value,
"force_timeout": self.request_timeout,
}
return {**self._default_params, **creds}
@ -347,7 +352,10 @@ class ChatLiteLLM(BaseChatModel):
)
generations.append(gen)
token_usage = response.get("usage", {})
llm_output = {"token_usage": token_usage, "model_name": self.model_name}
set_model_value = self.model
if self.model_name is not None:
set_model_value = self.model_name
llm_output = {"token_usage": token_usage, "model": set_model_value}
return ChatResult(generations=generations, llm_output=llm_output)
def _create_message_dicts(
@ -437,8 +445,11 @@ class ChatLiteLLM(BaseChatModel):
@property
def _identifying_params(self) -> Dict[str, Any]:
"""Get the identifying parameters."""
set_model_value = self.model
if self.model_name is not None:
set_model_value = self.model_name
return {
"model_name": self.model_name,
"model": set_model_value,
"temperature": self.temperature,
"top_p": self.top_p,
"top_k": self.top_k,

View File

@ -132,6 +132,7 @@ from langchain.document_loaders.pdf import (
PyPDFLoader,
UnstructuredPDFLoader,
)
from langchain.document_loaders.polars_dataframe import PolarsDataFrameLoader
from langchain.document_loaders.powerpoint import UnstructuredPowerPointLoader
from langchain.document_loaders.psychic import PsychicLoader
from langchain.document_loaders.pubmed import PubMedLoader
@ -299,6 +300,7 @@ __all__ = [
"PDFPlumberLoader",
"PagedPDFSplitter",
"PlaywrightURLLoader",
"PolarsDataFrameLoader",
"PsychicLoader",
"PubMedLoader",
"PyMuPDFLoader",

View File

@ -20,16 +20,14 @@ logger = logging.getLogger(__name__)
class ContentFormat(str, Enum):
"""Enumerator of the content formats of Confluence page."""
EDITOR = "body.editor"
EXPORT_VIEW = "body.export_view"
ANONYMOUS_EXPORT_VIEW = "body.anonymous_export_view"
STORAGE = "body.storage"
VIEW = "body.view"
def get_content(self, page: dict) -> str:
if self == ContentFormat.STORAGE:
return page["body"]["storage"]["value"]
elif self == ContentFormat.VIEW:
return page["body"]["view"]["value"]
raise ValueError("unknown content format")
return page["body"][self.name.lower()]["value"]
class ConfluenceLoader(BaseLoader):
@ -52,7 +50,10 @@ class ConfluenceLoader(BaseLoader):
raw XML representation for storage. The view format is the HTML representation for
viewing with macros are rendered as though it is viewed by users. You can pass
a enum `content_format` argument to `load()` to specify the content format, this is
set to `ContentFormat.STORAGE` by default.
set to `ContentFormat.STORAGE` by default, the supported values are:
`ContentFormat.EDITOR`, `ContentFormat.EXPORT_VIEW`,
`ContentFormat.ANONYMOUS_EXPORT_VIEW`, `ContentFormat.STORAGE`,
and `ContentFormat.VIEW`.
Hint: space_key and page_id can both be found in the URL of a page in Confluence
- https://yoursite.atlassian.com/wiki/spaces/<space_key>/pages/<page_id>
@ -238,7 +239,11 @@ class ConfluenceLoader(BaseLoader):
:type include_attachments: bool, optional
:param include_comments: defaults to False
:type include_comments: bool, optional
:param content_format: Specify content format, defaults to ContentFormat.STORAGE
:param content_format: Specify content format, defaults to
ContentFormat.STORAGE, the supported values are:
`ContentFormat.EDITOR`, `ContentFormat.EXPORT_VIEW`,
`ContentFormat.ANONYMOUS_EXPORT_VIEW`,
`ContentFormat.STORAGE`, and `ContentFormat.VIEW`.
:type content_format: ContentFormat
:param limit: Maximum number of pages to retrieve per request, defaults to 50
:type limit: int, optional
@ -473,14 +478,12 @@ class ConfluenceLoader(BaseLoader):
else:
attachment_texts = []
content = content_format.get_content(page)
if keep_markdown_format:
# Use markdownify to keep the page Markdown style
text = markdownify(
page["body"]["storage"]["value"], heading_style="ATX"
) + "".join(attachment_texts)
text = markdownify(content, heading_style="ATX") + "".join(attachment_texts)
else:
content = content_format.get_content(page)
if keep_newlines:
text = BeautifulSoup(
content.replace("</p>", "\n</p>").replace("<br />", "\n"), "lxml"

View File

@ -4,23 +4,15 @@ from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
class DataFrameLoader(BaseLoader):
"""Load `Pandas` DataFrame."""
def __init__(self, data_frame: Any, page_content_column: str = "text"):
class BaseDataFrameLoader(BaseLoader):
def __init__(self, data_frame: Any, *, page_content_column: str = "text"):
"""Initialize with dataframe object.
Args:
data_frame: Pandas DataFrame object.
data_frame: DataFrame object.
page_content_column: Name of the column containing the page content.
Defaults to "text".
"""
import pandas as pd
if not isinstance(data_frame, pd.DataFrame):
raise ValueError(
f"Expected data_frame to be a pd.DataFrame, got {type(data_frame)}"
)
self.data_frame = data_frame
self.page_content_column = page_content_column
@ -36,3 +28,28 @@ class DataFrameLoader(BaseLoader):
def load(self) -> List[Document]:
"""Load full dataframe."""
return list(self.lazy_load())
class DataFrameLoader(BaseDataFrameLoader):
"""Load `Pandas` DataFrame."""
def __init__(self, data_frame: Any, page_content_column: str = "text"):
"""Initialize with dataframe object.
Args:
data_frame: Pandas DataFrame object.
page_content_column: Name of the column containing the page content.
Defaults to "text".
"""
try:
import pandas as pd
except ImportError as e:
raise ImportError(
"Unable to import pandas, please install with `pip install pandas`."
) from e
if not isinstance(data_frame, pd.DataFrame):
raise ValueError(
f"Expected data_frame to be a pd.DataFrame, got {type(data_frame)}"
)
super().__init__(data_frame, page_content_column=page_content_column)

View File

@ -0,0 +1,32 @@
from typing import Any, Iterator
from langchain.docstore.document import Document
from langchain.document_loaders.dataframe import BaseDataFrameLoader
class PolarsDataFrameLoader(BaseDataFrameLoader):
"""Load `Polars` DataFrame."""
def __init__(self, data_frame: Any, *, page_content_column: str = "text"):
"""Initialize with dataframe object.
Args:
data_frame: Polars DataFrame object.
page_content_column: Name of the column containing the page content.
Defaults to "text".
"""
import polars as pl
if not isinstance(data_frame, pl.DataFrame):
raise ValueError(
f"Expected data_frame to be a pl.DataFrame, got {type(data_frame)}"
)
super().__init__(data_frame, page_content_column=page_content_column)
def lazy_load(self) -> Iterator[Document]:
"""Lazy load records from dataframe."""
for row in self.data_frame.iter_rows(named=True):
text = row[self.page_content_column]
row.pop(self.page_content_column)
yield Document(page_content=text, metadata=row)

View File

@ -1,10 +1,9 @@
from typing import Any, Iterator, List
from typing import Any
from langchain.docstore.document import Document
from langchain.document_loaders.base import BaseLoader
from langchain.document_loaders.dataframe import BaseDataFrameLoader
class XorbitsLoader(BaseLoader):
class XorbitsLoader(BaseDataFrameLoader):
"""Load `Xorbits` DataFrame."""
def __init__(self, data_frame: Any, page_content_column: str = "text"):
@ -30,17 +29,4 @@ class XorbitsLoader(BaseLoader):
f"Expected data_frame to be a xorbits.pandas.DataFrame, \
got {type(data_frame)}"
)
self.data_frame = data_frame
self.page_content_column = page_content_column
def lazy_load(self) -> Iterator[Document]:
"""Lazy load records from dataframe."""
for _, row in self.data_frame.iterrows():
text = row[self.page_content_column]
metadata = row.to_dict()
metadata.pop(self.page_content_column)
yield Document(page_content=text, metadata=metadata)
def load(self) -> List[Document]:
"""Load full dataframe."""
return list(self.lazy_load())
super().__init__(data_frame, page_content_column=page_content_column)

View File

@ -352,9 +352,9 @@ class OpenAIEmbeddings(BaseModel, Embeddings):
if self.show_progress_bar:
try:
import tqdm
from tqdm.auto import tqdm
_iter = tqdm.tqdm(range(0, len(tokens), _chunk_size))
_iter = tqdm(range(0, len(tokens), _chunk_size))
except ImportError:
_iter = range(0, len(tokens), _chunk_size)
else:

View File

@ -66,6 +66,10 @@ class ArangoGraph:
col_type: str = collection["type"]
col_size: int = self.db.collection(col_name).count()
# Skip collection if empty
if col_size == 0:
continue
# Set number of ArangoDB documents/edges to retrieve
limit_amount = ceil(sample_ratio * col_size) or 1

View File

@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Any, Optional
from langchain.load.dump import dumps
from langchain.load.load import loads
from langchain.utils import get_from_env
if TYPE_CHECKING:
from langchainhub import Client
@ -20,9 +19,7 @@ def _get_client(api_url: Optional[str] = None, api_key: Optional[str] = None) ->
"langchainhub`."
) from e
api_url = api_url or get_from_env("api_url", "LANGCHAIN_HUB_API_URL")
api_key = api_key or get_from_env("api_key", "LANGCHAIN_HUB_API_KEY", default="")
api_key = api_key or get_from_env("api_key", "LANGCHAIN_API_KEY")
# Client logic will also attempt to load URL/key from environment variables
return Client(api_url, api_key=api_key)
@ -33,14 +30,33 @@ def push(
api_url: Optional[str] = None,
api_key: Optional[str] = None,
parent_commit_hash: Optional[str] = "latest",
new_repo_is_public: bool = False,
new_repo_description: str = "",
) -> str:
"""
Pushes an object to the hub and returns the URL.
Pushes an object to the hub and returns the new commit hash.
:param repo_full_name: The full name of the repo to push to in the format of
`owner/repo`.
:param object: The LangChain to serialize and push to the hub.
:param api_url: The URL of the LangChain Hub API. Defaults to the hosted API service
if you have an api key set, or a localhost instance if not.
:param api_key: The API key to use to authenticate with the LangChain Hub API.
:param parent_commit_hash: The commit hash of the parent commit to push to. Defaults
to the latest commit automatically.
:param new_repo_is_public: Whether the repo should be public. Defaults to
False (Private by default).
:param new_repo_description: The description of the repo. Defaults to an empty
string.
"""
client = _get_client(api_url=api_url, api_key=api_key)
manifest_json = dumps(object)
resp = client.push(
repo_full_name, manifest_json, parent_commit_hash=parent_commit_hash
repo_full_name,
manifest_json,
parent_commit_hash=parent_commit_hash,
new_repo_is_public=new_repo_is_public,
new_repo_description=new_repo_description,
)
commit_hash: str = resp["commit"]["commit_hash"]
return commit_hash
@ -53,7 +69,13 @@ def pull(
api_key: Optional[str] = None,
) -> Any:
"""
Pulls an object from the hub and returns it.
Pulls an object from the hub and returns it as a LangChain object.
:param owner_repo_commit: The full name of the repo to pull from in the format of
`owner/repo:commit_hash`.
:param api_url: The URL of the LangChain Hub API. Defaults to the hosted API service
if you have an api key set, or a localhost instance if not.
:param api_key: The API key to use to authenticate with the LangChain Hub API.
"""
client = _get_client(api_url=api_url, api_key=api_key)
resp: str = client.pull(owner_repo_commit)

View File

@ -528,9 +528,13 @@ class BaseLLM(BaseLanguageModel[str], ABC):
f" argument of type {type(prompts)}."
)
# Create callback managers
if isinstance(callbacks, list) and (
isinstance(callbacks[0], (list, BaseCallbackManager))
or callbacks[0] is None
if (
isinstance(callbacks, list)
and callbacks
and (
isinstance(callbacks[0], (list, BaseCallbackManager))
or callbacks[0] is None
)
):
# We've received a list of callbacks args to apply to each input
assert len(callbacks) == len(prompts)

View File

@ -59,6 +59,9 @@ class VLLM(BaseLLM):
logprobs: Optional[int] = None
"""Number of log probabilities to return per output token."""
dtype: str = "auto"
"""The data type for the model weights and activations."""
client: Any #: :meta private:
@root_validator()
@ -77,6 +80,7 @@ class VLLM(BaseLLM):
model=values["model"],
tensor_parallel_size=values["tensor_parallel_size"],
trust_remote_code=values["trust_remote_code"],
dtype=values["dtype"],
)
return values

View File

@ -1,6 +1,6 @@
from typing import List
from langchain.pydantic_v1 import BaseModel
from langchain.pydantic_v1 import BaseModel, Field
from langchain.schema import (
BaseChatMessageHistory,
)
@ -13,7 +13,7 @@ class ChatMessageHistory(BaseChatMessageHistory, BaseModel):
Stores messages in an in memory list.
"""
messages: List[BaseMessage] = []
messages: List[BaseMessage] = Field(default_factory=list)
def add_message(self, message: BaseMessage) -> None:
"""Add a self-created message to the store"""

View File

@ -69,6 +69,13 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
when making API calls. If not provided, credentials will be ascertained from
the environment."""
# TODO: Add extra data type handling for type website
engine_data_type: int = Field(default=0, ge=0, le=1)
""" Defines the enterprise search data type
0 - Unstructured data
1 - Structured data
"""
_client: SearchServiceClient
_serving_config: str
@ -86,10 +93,18 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
from google.cloud import discoveryengine_v1beta # noqa: F401
except ImportError as exc:
raise ImportError(
"google.cloud.discoveryengine is not installed. "
"google.cloud.discoveryengine is not installed."
"Please install it with pip install google-cloud-discoveryengine"
) from exc
try:
from google.api_core.exceptions import InvalidArgument # noqa: F401
except ImportError as exc:
raise ImportError(
"google.api_core.exceptions is not installed. "
"Please install it with pip install google-api-core"
) from exc
values["project_id"] = get_from_dict_or_env(values, "project_id", "PROJECT_ID")
values["search_engine_id"] = get_from_dict_or_env(
values, "search_engine_id", "SEARCH_ENGINE_ID"
@ -110,7 +125,7 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
serving_config=self.serving_config_id,
)
def _convert_search_response(
def _convert_unstructured_search_response(
self, results: Sequence[SearchResult]
) -> List[Document]:
"""Converts a sequence of search results to a list of LangChain documents."""
@ -149,6 +164,30 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
return documents
def _convert_structured_search_response(
self, results: Sequence[SearchResult]
) -> List[Document]:
"""Converts a sequence of search results to a list of LangChain documents."""
import json
from google.protobuf.json_format import MessageToDict
documents: List[Document] = []
for result in results:
document_dict = MessageToDict(
result.document._pb, preserving_proto_field_name=True
)
documents.append(
Document(
page_content=json.dumps(document_dict.get("struct_data", {})),
metadata={"id": document_dict["id"], "name": document_dict["name"]},
)
)
return documents
def _create_search_request(self, query: str) -> SearchRequest:
"""Prepares a SearchRequest object."""
from google.cloud.discoveryengine_v1beta import SearchRequest
@ -161,23 +200,32 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
mode=self.spell_correction_mode
)
if self.get_extractive_answers:
extractive_content_spec = (
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
max_extractive_answer_count=self.max_extractive_answer_count,
if self.engine_data_type == 0:
if self.get_extractive_answers:
extractive_content_spec = (
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
max_extractive_answer_count=self.max_extractive_answer_count,
)
)
else:
extractive_content_spec = (
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
max_extractive_segment_count=self.max_extractive_segment_count,
)
)
content_search_spec = SearchRequest.ContentSearchSpec(
extractive_content_spec=extractive_content_spec
)
elif self.engine_data_type == 1:
content_search_spec = None
else:
extractive_content_spec = (
SearchRequest.ContentSearchSpec.ExtractiveContentSpec(
max_extractive_segment_count=self.max_extractive_segment_count,
)
# TODO: Add extra data type handling for type website
raise NotImplementedError(
"Only engine data type 0 (Unstructured) or 1 (Structured)"
+ " are supported currently."
+ f" Got {self.engine_data_type}"
)
content_search_spec = SearchRequest.ContentSearchSpec(
extractive_content_spec=extractive_content_spec,
)
return SearchRequest(
query=query,
filter=self.filter,
@ -192,8 +240,27 @@ class GoogleCloudEnterpriseSearchRetriever(BaseRetriever):
self, query: str, *, run_manager: CallbackManagerForRetrieverRun
) -> List[Document]:
"""Get documents relevant for a query."""
from google.api_core.exceptions import InvalidArgument
search_request = self._create_search_request(query)
response = self._client.search(search_request)
documents = self._convert_search_response(response.results)
try:
response = self._client.search(search_request)
except InvalidArgument as e:
raise type(e)(
e.message + " This might be due to engine_data_type not set correctly."
)
if self.engine_data_type == 0:
documents = self._convert_unstructured_search_response(response.results)
elif self.engine_data_type == 1:
documents = self._convert_structured_search_response(response.results)
else:
# TODO: Add extra data type handling for type website
raise NotImplementedError(
"Only engine data type 0 (Unstructured) or 1 (Structured)"
+ " are supported currently."
+ f" Got {self.engine_data_type}"
)
return documents

View File

@ -97,7 +97,7 @@ class ParentDocumentRetriever(BaseRetriever):
def add_documents(
self,
documents: List[Document],
ids: Optional[List[str]],
ids: Optional[List[str]] = None,
add_to_docstore: bool = True,
) -> None:
"""Adds documents to the docstore and vectorstores.

View File

@ -17,6 +17,11 @@ tool for the job.
CallbackManagerForToolRun, AsyncCallbackManagerForToolRun
"""
from langchain.tools.ainetwork.app import AINAppOps
from langchain.tools.ainetwork.owner import AINOwnerOps
from langchain.tools.ainetwork.rule import AINRuleOps
from langchain.tools.ainetwork.transfer import AINTransfer
from langchain.tools.ainetwork.value import AINValueOps
from langchain.tools.arxiv.tool import ArxivQueryRun
from langchain.tools.azure_cognitive_services import (
AzureCogsFormRecognizerTool,
@ -118,6 +123,11 @@ from langchain.tools.youtube.search import YouTubeSearchTool
from langchain.tools.zapier.tool import ZapierNLAListActions, ZapierNLARunAction
__all__ = [
"AINAppOps",
"AINOwnerOps",
"AINRuleOps",
"AINTransfer",
"AINValueOps",
"AIPluginTool",
"APIOperation",
"ArxivQueryRun",

View File

@ -0,0 +1,95 @@
import builtins
import json
from enum import Enum
from typing import List, Optional, Type, Union
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools.ainetwork.base import AINBaseTool
class AppOperationType(str, Enum):
SET_ADMIN = "SET_ADMIN"
GET_CONFIG = "GET_CONFIG"
class AppSchema(BaseModel):
type: AppOperationType = Field(...)
appName: str = Field(..., description="Name of the application on the blockchain")
address: Optional[Union[str, List[str]]] = Field(
None,
description=(
"A single address or a list of addresses. Default: current session's "
"address"
),
)
class AINAppOps(AINBaseTool):
name: str = "AINappOps"
description: str = """
Create an app in the AINetwork Blockchain database by creating the /apps/<appName> path.
An address set as `admin` can grant `owner` rights to other addresses (refer to `AINownerOps` for more details).
Also, `admin` is initialized to have all `owner` permissions and `rule` allowed for that path.
## appName Rule
- [a-z_0-9]+
## address Rules
- 0x[0-9a-fA-F]{40}
- Defaults to the current session's address
- Multiple addresses can be specified if needed
## SET_ADMIN Example 1
- type: SET_ADMIN
- appName: ain_project
### Result:
1. Path /apps/ain_project created.
2. Current session's address registered as admin.
## SET_ADMIN Example 2
- type: SET_ADMIN
- appName: test_project
- address: [<address1>, <address2>]
### Result:
1. Path /apps/test_project created.
2. <address1> and <address2> registered as admin.
""" # noqa: E501
args_schema: Type[BaseModel] = AppSchema
async def _arun(
self,
type: AppOperationType,
appName: str,
address: Optional[Union[str, List[str]]] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
from ain.types import ValueOnlyTransactionInput
from ain.utils import getTimestamp
try:
if type is AppOperationType.SET_ADMIN:
if address is None:
address = self.interface.wallet.defaultAccount.address
if isinstance(address, str):
address = [address]
res = await self.interface.db.ref(
f"/manage_app/{appName}/create/{getTimestamp()}"
).setValue(
transactionInput=ValueOnlyTransactionInput(
value={"admin": {address: True for address in address}}
)
)
elif type is AppOperationType.GET_CONFIG:
res = await self.interface.db.ref(
f"/manage_app/{appName}/config"
).getValue()
else:
raise ValueError(f"Unsupported 'type': {type}.")
return json.dumps(res, ensure_ascii=False)
except Exception as e:
return f"{builtins.type(e).__name__}: {str(e)}"

View File

@ -0,0 +1,71 @@
"""Base class for AINetwork tools."""
from __future__ import annotations
import asyncio
import threading
from enum import Enum
from typing import TYPE_CHECKING, Any, Optional
from langchain.callbacks.manager import CallbackManagerForToolRun
from langchain.pydantic_v1 import Field
from langchain.tools.ainetwork.utils import authenticate
from langchain.tools.base import BaseTool
if TYPE_CHECKING:
from ain.ain import Ain
class OperationType(str, Enum):
SET = "SET"
GET = "GET"
class AINBaseTool(BaseTool):
"""Base class for the AINetwork tools."""
interface: Ain = Field(default_factory=authenticate)
"""The interface object for the AINetwork Blockchain."""
def _run(
self,
*args: Any,
run_manager: Optional[CallbackManagerForToolRun] = None,
**kwargs: Any,
) -> str:
try:
loop = asyncio.get_event_loop()
except RuntimeError:
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
if loop.is_closed():
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)
if loop.is_running():
result_container = []
def thread_target() -> None:
nonlocal result_container
new_loop = asyncio.new_event_loop()
asyncio.set_event_loop(new_loop)
try:
result_container.append(
new_loop.run_until_complete(self._arun(*args, **kwargs))
)
except Exception as e:
result_container.append(e)
finally:
new_loop.close()
thread = threading.Thread(target=thread_target)
thread.start()
thread.join()
result = result_container[0]
if isinstance(result, Exception):
raise result
return result
else:
result = loop.run_until_complete(self._arun(*args, **kwargs))
loop.close()
return result

View File

@ -0,0 +1,110 @@
import builtins
import json
from typing import List, Optional, Type, Union
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools.ainetwork.base import AINBaseTool, OperationType
class RuleSchema(BaseModel):
type: OperationType = Field(...)
path: str = Field(..., description="Blockchain reference path")
address: Optional[Union[str, List[str]]] = Field(
None, description="A single address or a list of addresses"
)
write_owner: Optional[bool] = Field(
False, description="Authority to edit the `owner` property of the path"
)
write_rule: Optional[bool] = Field(
False, description="Authority to edit `write rule` for the path"
)
write_function: Optional[bool] = Field(
False, description="Authority to `set function` for the path"
)
branch_owner: Optional[bool] = Field(
False, description="Authority to initialize `owner` of sub-paths"
)
class AINOwnerOps(AINBaseTool):
name: str = "AINownerOps"
description: str = """
Rules for `owner` in AINetwork Blockchain database.
An address set as `owner` can modify permissions according to its granted authorities
## Path Rule
- (/[a-zA-Z_0-9]+)+
- Permission checks ascend from the most specific (child) path to broader (parent) paths until an `owner` is located.
## Address Rules
- 0x[0-9a-fA-F]{40}: 40-digit hexadecimal address
- *: All addresses permitted
- Defaults to the current session's address
## SET
- `SET` alters permissions for specific addresses, while other addresses remain unaffected.
- When removing an address of `owner`, set all authorities for that address to false.
- message `write_owner permission evaluated false` if fail
### Example
- type: SET
- path: /apps/langchain
- address: [<address 1>, <address 2>]
- write_owner: True
- write_rule: True
- write_function: True
- branch_owner: True
## GET
- Provides all addresses with `owner` permissions and their authorities in the path.
### Example
- type: GET
- path: /apps/langchain
""" # noqa: E501
args_schema: Type[BaseModel] = RuleSchema
async def _arun(
self,
type: OperationType,
path: str,
address: Optional[Union[str, List[str]]] = None,
write_owner: Optional[bool] = None,
write_rule: Optional[bool] = None,
write_function: Optional[bool] = None,
branch_owner: Optional[bool] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
from ain.types import ValueOnlyTransactionInput
try:
if type is OperationType.SET:
if address is None:
address = self.interface.wallet.defaultAccount.address
if isinstance(address, str):
address = [address]
res = await self.interface.db.ref(path).setOwner(
transactionInput=ValueOnlyTransactionInput(
value={
".owner": {
"owners": {
address: {
"write_owner": write_owner or False,
"write_rule": write_rule or False,
"write_function": write_function or False,
"branch_owner": branch_owner or False,
}
for address in address
}
}
}
)
)
elif type is OperationType.GET:
res = await self.interface.db.ref(path).getOwner()
else:
raise ValueError(f"Unsupported 'type': {type}.")
return json.dumps(res, ensure_ascii=False)
except Exception as e:
return f"{builtins.type(e).__name__}: {str(e)}"

View File

@ -0,0 +1,77 @@
import builtins
import json
from typing import Optional, Type
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools.ainetwork.base import AINBaseTool, OperationType
class RuleSchema(BaseModel):
type: OperationType = Field(...)
path: str = Field(..., description="Path on the blockchain where the rule applies")
eval: Optional[str] = Field(None, description="eval string to determine permission")
class AINRuleOps(AINBaseTool):
name: str = "AINruleOps"
description: str = """
Covers the write `rule` for the AINetwork Blockchain database. The SET type specifies write permissions using the `eval` variable as a JavaScript eval string.
In order to AINvalueOps with SET at the path, the execution result of the `eval` string must be true.
## Path Rules
1. Allowed characters for directory: `[a-zA-Z_0-9]`
2. Use `$<key>` for template variables as directory.
## Eval String Special Variables
- auth.addr: Address of the writer for the path
- newData: New data for the path
- data: Current data for the path
- currentTime: Time in seconds
- lastBlockNumber: Latest processed block number
## Eval String Functions
- getValue(<path>)
- getRule(<path>)
- getOwner(<path>)
- getFunction(<path>)
- evalRule(<path>, <value to set>, auth, currentTime)
- evalOwner(<path>, 'write_owner', auth)
## SET Example
- type: SET
- path: /apps/langchain_project_1/$from/$to/$img
- eval: auth.addr===$from&&!getValue('/apps/image_db/'+$img)
## GET Example
- type: GET
- path: /apps/langchain_project_1
""" # noqa: E501
args_schema: Type[BaseModel] = RuleSchema
async def _arun(
self,
type: OperationType,
path: str,
eval: Optional[str] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
from ain.types import ValueOnlyTransactionInput
try:
if type is OperationType.SET:
if eval is None:
raise ValueError("'eval' is required for SET operation.")
res = await self.interface.db.ref(path).setRule(
transactionInput=ValueOnlyTransactionInput(
value={".rule": {"write": eval}}
)
)
elif type is OperationType.GET:
res = await self.interface.db.ref(path).getRule()
else:
raise ValueError(f"Unsupported 'type': {type}.")
return json.dumps(res, ensure_ascii=False)
except Exception as e:
return f"{builtins.type(e).__name__}: {str(e)}"

View File

@ -0,0 +1,29 @@
import json
from typing import Optional, Type
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools.ainetwork.base import AINBaseTool
class TransferSchema(BaseModel):
address: str = Field(..., description="Address to transfer AIN to")
amount: int = Field(..., description="Amount of AIN to transfer")
class AINTransfer(AINBaseTool):
name: str = "AINtransfer"
description: str = "Transfers AIN to a specified address"
args_schema: Type[TransferSchema] = TransferSchema
async def _arun(
self,
address: str,
amount: int,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
try:
res = await self.interface.wallet.transfer(address, amount, nonce=-1)
return json.dumps(res, ensure_ascii=False)
except Exception as e:
return f"{type(e).__name__}: {str(e)}"

View File

@ -0,0 +1,62 @@
"""AINetwork Blockchain tool utils."""
from __future__ import annotations
import os
from typing import TYPE_CHECKING, Literal, Optional
if TYPE_CHECKING:
from ain.ain import Ain
def authenticate(network: Optional[Literal["mainnet", "testnet"]] = "testnet") -> Ain:
"""Authenticate using the AIN Blockchain"""
try:
from ain.ain import Ain
except ImportError as e:
raise ImportError(
"Cannot import ain-py related modules. Please install the package with "
"`pip install ain-py`."
) from e
if network == "mainnet":
provider_url = "https://mainnet-api.ainetwork.ai/"
chain_id = 1
if "AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY" in os.environ:
private_key = os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"]
else:
raise EnvironmentError(
"Error: The AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY environmental variable "
"has not been set."
)
elif network == "testnet":
provider_url = "https://testnet-api.ainetwork.ai/"
chain_id = 0
if "AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY" in os.environ:
private_key = os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"]
else:
raise EnvironmentError(
"Error: The AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY environmental variable "
"has not been set."
)
elif network is None:
if (
"AIN_BLOCKCHAIN_PROVIDER_URL" in os.environ
and "AIN_BLOCKCHAIN_CHAIN_ID" in os.environ
and "AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY" in os.environ
):
provider_url = os.environ["AIN_BLOCKCHAIN_PROVIDER_URL"]
chain_id = int(os.environ["AIN_BLOCKCHAIN_CHAIN_ID"])
private_key = os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"]
else:
raise EnvironmentError(
"Error: The AIN_BLOCKCHAIN_PROVIDER_URL and "
"AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY and AIN_BLOCKCHAIN_CHAIN_ID "
"environmental variable has not been set."
)
else:
raise ValueError(f"Unsupported 'network': {network}")
ain = Ain(provider_url, chain_id)
ain.wallet.addAndSetDefaultAccount(private_key)
return ain

View File

@ -0,0 +1,80 @@
import builtins
import json
from typing import Optional, Type, Union
from langchain.callbacks.manager import AsyncCallbackManagerForToolRun
from langchain.pydantic_v1 import BaseModel, Field
from langchain.tools.ainetwork.base import AINBaseTool, OperationType
class ValueSchema(BaseModel):
type: OperationType = Field(...)
path: str = Field(..., description="Blockchain reference path")
value: Optional[Union[int, str, float, dict]] = Field(
None, description="Value to be set at the path"
)
class AINValueOps(AINBaseTool):
name: str = "AINvalueOps"
description: str = """
Covers the read and write value for the AINetwork Blockchain database.
## SET
- Set a value at a given path
### Example
- type: SET
- path: /apps/langchain_test_1/object
- value: {1: 2, "34": 56}
## GET
- Retrieve a value at a given path
### Example
- type: GET
- path: /apps/langchain_test_1/DB
## Special paths
- `/accounts/<address>/balance`: Account balance
- `/accounts/<address>/nonce`: Account nonce
- `/apps`: Applications
- `/consensus`: Consensus
- `/checkin`: Check-in
- `/deposit/<service id>/<address>/<deposit id>`: Deposit
- `/deposit_accounts/<service id>/<address>/<account id>`: Deposit accounts
- `/escrow`: Escrow
- `/payments`: Payment
- `/sharding`: Sharding
- `/token/name`: Token name
- `/token/symbol`: Token symbol
- `/token/total_supply`: Token total supply
- `/transfer/<address from>/<address to>/<key>/value`: Transfer
- `/withdraw/<service id>/<address>/<withdraw id>`: Withdraw
"""
args_schema: Type[BaseModel] = ValueSchema
async def _arun(
self,
type: OperationType,
path: str,
value: Optional[Union[int, str, float, dict]] = None,
run_manager: Optional[AsyncCallbackManagerForToolRun] = None,
) -> str:
from ain.types import ValueOnlyTransactionInput
try:
if type is OperationType.SET:
if value is None:
raise ValueError("'value' is required for SET operation.")
res = await self.interface.db.ref(path).setValue(
transactionInput=ValueOnlyTransactionInput(value=value)
)
elif type is OperationType.GET:
res = await self.interface.db.ref(path).getValue()
else:
raise ValueError(f"Unsupported 'type': {type}.")
return json.dumps(res, ensure_ascii=False)
except Exception as e:
return f"{builtins.type(e).__name__}: {str(e)}"

View File

@ -6,13 +6,13 @@ import re
import sys
from contextlib import redirect_stdout
from io import StringIO
from typing import Any, Dict, Optional
from typing import Any, Dict, Optional, Type
from langchain.callbacks.manager import (
AsyncCallbackManagerForToolRun,
CallbackManagerForToolRun,
)
from langchain.pydantic_v1 import Field, root_validator
from langchain.pydantic_v1 import BaseModel, Field, root_validator
from langchain.tools.base import BaseTool
from langchain.utilities import PythonREPL
@ -77,6 +77,10 @@ class PythonREPLTool(BaseTool):
return result
class PythonInputs(BaseModel):
query: str = Field(description="code snippet to run")
class PythonAstREPLTool(BaseTool):
"""A tool for running python code in a REPL."""
@ -90,6 +94,7 @@ class PythonAstREPLTool(BaseTool):
globals: Optional[Dict] = Field(default_factory=dict)
locals: Optional[Dict] = Field(default_factory=dict)
sanitize_input: bool = True
args_schema: Type[BaseModel] = PythonInputs
@root_validator(pre=True)
def validate_python_version(cls, values: Dict) -> Dict:

View File

@ -1,10 +1,12 @@
from collections import deque
from itertools import islice
from typing import (
Any,
ContextManager,
Deque,
Generator,
Generic,
Iterable,
Iterator,
List,
Optional,
@ -161,3 +163,13 @@ class Tee(Generic[T]):
# Why this is needed https://stackoverflow.com/a/44638570
safetee = Tee
def batch_iterate(size: int, iterable: Iterable[T]) -> Iterator[List[T]]:
"""Utility batching function."""
it = iter(iterable)
while True:
chunk = list(islice(it, size))
if not chunk:
return
yield chunk

View File

@ -21,7 +21,7 @@ def _create_client(
try:
import meilisearch
except ImportError:
raise ValueError(
raise ImportError(
"Could not import meilisearch python package. "
"Please install it with `pip install meilisearch`."
)

View File

@ -52,6 +52,9 @@ class Milvus(VectorStore):
default of index.
drop_old (Optional[bool]): Whether to drop the current collection. Defaults
to False.
primary_field (str): Name of the primary key field. Defaults to "pk".
text_field (str): Name of the text field. Defaults to "text".
vector_field (str): Name of the vector field. Defaults to "vector".
The connection args used for this class comes in the form of a dict,
here are a few of the options:
@ -107,6 +110,10 @@ class Milvus(VectorStore):
index_params: Optional[dict] = None,
search_params: Optional[dict] = None,
drop_old: Optional[bool] = False,
*,
primary_field: str = "pk",
text_field: str = "text",
vector_field: str = "vector",
):
"""Initialize the Milvus vector store."""
try:
@ -138,11 +145,11 @@ class Milvus(VectorStore):
self.consistency_level = consistency_level
# In order for a collection to be compatible, pk needs to be auto'id and int
self._primary_field = "pk"
# In order for compatiblility, the text field will need to be called "text"
self._text_field = "text"
self._primary_field = primary_field
# In order for compatibility, the text field will need to be called "text"
self._text_field = text_field
# In order for compatibility, the vector field needs to be called "vector"
self._vector_field = "vector"
self._vector_field = vector_field
self.fields: list[str] = []
# Create the connection to the server
if connection_args is None:

File diff suppressed because it is too large Load Diff

View File

@ -1,6 +1,6 @@
[tool.poetry]
name = "langchain"
version = "0.0.270"
version = "0.0.271"
description = "Building applications with LLMs through composability"
authors = []
license = "MIT"
@ -125,6 +125,8 @@ newspaper3k = {version = "^0.2.8", optional = true}
amazon-textract-caller = {version = "<2", optional = true}
xata = {version = "^1.0.0a7", optional = true}
xmltodict = {version = "^0.13.0", optional = true}
google-api-core = {version = "^2.11.1", optional = true}
markdownify = {version = "^0.11.6", optional = true}
[tool.poetry.group.test.dependencies]
@ -337,6 +339,7 @@ extended_testing = [
"xmltodict",
"faiss-cpu",
"openapi-schema-pydantic",
"markdownify",
]
[tool.ruff]

View File

@ -0,0 +1,175 @@
import asyncio
import os
import time
import urllib.request
import uuid
from enum import Enum
from typing import Any
from urllib.error import HTTPError
import pytest
from langchain.agents import AgentType, initialize_agent
from langchain.agents.agent_toolkits.ainetwork.toolkit import AINetworkToolkit
from langchain.chat_models import ChatOpenAI
from langchain.tools.ainetwork.utils import authenticate
class Match(Enum):
__test__ = False
ListWildcard = 1
StrWildcard = 2
DictWildcard = 3
IntWildcard = 4
FloatWildcard = 5
ObjectWildcard = 6
@classmethod
def match(cls, value: Any, template: Any) -> bool:
if template is cls.ListWildcard:
return isinstance(value, list)
elif template is cls.StrWildcard:
return isinstance(value, str)
elif template is cls.DictWildcard:
return isinstance(value, dict)
elif template is cls.IntWildcard:
return isinstance(value, int)
elif template is cls.FloatWildcard:
return isinstance(value, float)
elif template is cls.ObjectWildcard:
return True
elif type(value) != type(template):
return False
elif isinstance(value, dict):
if len(value) != len(template):
return False
for k, v in value.items():
if k not in template or not cls.match(v, template[k]):
return False
return True
elif isinstance(value, list):
if len(value) != len(template):
return False
for i in range(len(value)):
if not cls.match(value[i], template[i]):
return False
return True
else:
return value == template
@pytest.mark.requires("ain")
def test_ainetwork_toolkit() -> None:
def get(path: str, type: str = "value", default: Any = None) -> Any:
ref = ain.db.ref(path)
value = asyncio.run(
{
"value": ref.getValue,
"rule": ref.getRule,
"owner": ref.getOwner,
}[type]()
)
return default if value is None else value
def validate(path: str, template: Any, type: str = "value") -> bool:
value = get(path, type)
return Match.match(value, template)
if not os.environ.get("AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY", None):
from ain.account import Account
account = Account.create()
os.environ["AIN_BLOCKCHAIN_ACCOUNT_PRIVATE_KEY"] = account.private_key
interface = authenticate(network="testnet")
toolkit = AINetworkToolkit(network="testnet", interface=interface)
llm = ChatOpenAI(model="gpt-4", temperature=0)
agent = initialize_agent(
tools=toolkit.get_tools(),
llm=llm,
verbose=True,
agent=AgentType.OPENAI_FUNCTIONS,
)
ain = interface
self_address = ain.wallet.defaultAccount.address
co_address = "0x6813Eb9362372EEF6200f3b1dbC3f819671cBA69"
# Test creating an app
UUID = uuid.UUID(
int=(int(time.time() * 1000) << 64) | (uuid.uuid4().int & ((1 << 64) - 1))
)
app_name = f"_langchain_test__{str(UUID).replace('-', '_')}"
agent.run(f"""Create app {app_name}""")
validate(f"/manage_app/{app_name}/config", {"admin": {self_address: True}})
validate(f"/apps/{app_name}/DB", None, "owner")
# Test reading owner config
agent.run(f"""Read owner config of /apps/{app_name}/DB .""")
assert ...
# Test granting owner config
agent.run(
f"""Grant owner authority to {co_address} for edit write rule permission of /apps/{app_name}/DB_co .""" # noqa: E501
)
validate(
f"/apps/{app_name}/DB_co",
{
".owner": {
"owners": {
co_address: {
"branch_owner": False,
"write_function": False,
"write_owner": False,
"write_rule": True,
}
}
}
},
"owner",
)
# Test reading owner config
agent.run(f"""Read owner config of /apps/{app_name}/DB_co .""")
assert ...
# Test reading owner config
agent.run(f"""Read owner config of /apps/{app_name}/DB .""")
assert ... # Check if owner {self_address} exists
# Test reading a value
agent.run(f"""Read value in /apps/{app_name}/DB""")
assert ... # empty
# Test writing a value
agent.run(f"""Write value {{1: 1904, 2: 43}} in /apps/{app_name}/DB""")
validate(f"/apps/{app_name}/DB", {1: 1904, 2: 43})
# Test reading a value
agent.run(f"""Read value in /apps/{app_name}/DB""")
assert ... # check value
# Test reading a rule
agent.run(f"""Read write rule of app {app_name} .""")
assert ... # check rule that self_address exists
# Test sending AIN
self_balance = get(f"/accounts/{self_address}/balance", default=0)
transaction_history = get(f"/transfer/{self_address}/{co_address}", default={})
if self_balance < 1:
try:
with urllib.request.urlopen(
f"http://faucet.ainetwork.ai/api/test/{self_address}/"
) as response:
try_test = response.getcode()
except HTTPError as e:
try_test = e.getcode()
else:
try_test = 200
if try_test == 200:
agent.run(f"""Send 1 AIN to {co_address}""")
transaction_update = get(f"/transfer/{self_address}/{co_address}", default={})
assert any(
transaction_update[key]["value"] == 1
for key in transaction_update.keys() - transaction_history.keys()
)

View File

@ -55,6 +55,21 @@ def test_connect_arangodb() -> None:
assert ["hello_world"] == sample_aql_result
def test_empty_schema_on_no_data() -> None:
"""Test that the schema is empty for an empty ArangoDB Database"""
db = get_arangodb_client()
db.delete_graph("GameOfThrones", drop_collections=True, ignore_missing=True)
db.delete_collection("empty_collection", ignore_missing=True)
db.create_collection("empty_collection")
graph = ArangoGraph(db)
assert graph.schema == {
"Graph Schema": [],
"Collection Schema": [],
}
def test_aql_generation() -> None:
"""Test that AQL statement is correctly generated and executed."""
db = get_arangodb_client()

View File

@ -1,6 +1,8 @@
"""Test ChatOpenAI wrapper."""
from typing import Any
import pytest
from langchain.callbacks.manager import CallbackManager
@ -89,6 +91,34 @@ def test_chat_openai_streaming() -> None:
assert isinstance(response, BaseMessage)
@pytest.mark.scheduled
def test_chat_openai_streaming_generation_info() -> None:
"""Test that generation info is preserved when streaming."""
class _FakeCallback(FakeCallbackHandler):
saved_things: dict = {}
def on_llm_end(
self,
*args: Any,
**kwargs: Any,
) -> Any:
# Save the generation
self.saved_things["generation"] = args[0]
callback = _FakeCallback()
callback_manager = CallbackManager([callback])
chat = ChatOpenAI(
max_tokens=2,
temperature=0,
callback_manager=callback_manager,
)
list(chat.stream("hi"))
generation = callback.saved_things["generation"]
# `Hello!` is two tokens, assert that that is what is returned
assert generation.generations[0][0].text == "Hello!"
def test_chat_openai_llm_output_contains_model_name() -> None:
"""Test llm_output contains model_name."""
chat = ChatOpenAI(max_tokens=10)

View File

@ -0,0 +1,48 @@
import polars as pl
import pytest
from langchain.document_loaders import PolarsDataFrameLoader
from langchain.schema import Document
@pytest.fixture
def sample_data_frame() -> pl.DataFrame:
data = {
"text": ["Hello", "World"],
"author": ["Alice", "Bob"],
"date": ["2022-01-01", "2022-01-02"],
}
return pl.DataFrame(data)
def test_load_returns_list_of_documents(sample_data_frame: pl.DataFrame) -> None:
loader = PolarsDataFrameLoader(sample_data_frame)
docs = loader.load()
assert isinstance(docs, list)
assert all(isinstance(doc, Document) for doc in docs)
assert len(docs) == 2
def test_load_converts_dataframe_columns_to_document_metadata(
sample_data_frame: pl.DataFrame,
) -> None:
loader = PolarsDataFrameLoader(sample_data_frame)
docs = loader.load()
for i, doc in enumerate(docs):
df: pl.DataFrame = sample_data_frame[i]
assert df is not None
assert doc.metadata["author"] == df.select("author").item()
assert doc.metadata["date"] == df.select("date").item()
def test_load_uses_page_content_column_to_create_document_text(
sample_data_frame: pl.DataFrame,
) -> None:
sample_data_frame = sample_data_frame.rename(mapping={"text": "dummy_test_column"})
loader = PolarsDataFrameLoader(
sample_data_frame, page_content_column="dummy_test_column"
)
docs = loader.load()
assert docs[0].page_content == "Hello"
assert docs[1].page_content == "World"

View File

@ -11,12 +11,15 @@ PROJECT_ID - set to your Google Cloud project ID
SEARCH_ENGINE_ID - the ID of the search engine to use for the test
"""
import pytest
from langchain.retrievers.google_cloud_enterprise_search import (
GoogleCloudEnterpriseSearchRetriever,
)
from langchain.schema import Document
@pytest.mark.requires("google_api_core")
def test_google_cloud_enterprise_search_get_relevant_documents() -> None:
"""Test the get_relevant_documents() method."""
retriever = GoogleCloudEnterpriseSearchRetriever()

View File

@ -6,7 +6,7 @@ import pytest
import requests
from langchain.docstore.document import Document
from langchain.document_loaders.confluence import ConfluenceLoader
from langchain.document_loaders.confluence import ConfluenceLoader, ContentFormat
@pytest.fixture
@ -152,6 +152,40 @@ class TestConfluenceLoader:
assert mock_confluence.cql.call_count == 0
assert mock_confluence.get_page_child_by_type.call_count == 0
def test_confluence_loader_when_content_format_and_keep_markdown_format_enabled(
self, mock_confluence: MagicMock
) -> None:
# one response with two pages
mock_confluence.get_all_pages_from_space.return_value = [
self._get_mock_page("123", ContentFormat.VIEW),
self._get_mock_page("456", ContentFormat.VIEW),
]
mock_confluence.get_all_restrictions_for_content.side_effect = [
self._get_mock_page_restrictions("123"),
self._get_mock_page_restrictions("456"),
]
confluence_loader = self._get_mock_confluence_loader(mock_confluence)
documents = confluence_loader.load(
space_key=self.MOCK_SPACE_KEY,
content_format=ContentFormat.VIEW,
keep_markdown_format=True,
max_pages=2,
)
assert mock_confluence.get_all_pages_from_space.call_count == 1
assert len(documents) == 2
assert all(isinstance(doc, Document) for doc in documents)
assert documents[0].page_content == "Content 123\n\n"
assert documents[1].page_content == "Content 456\n\n"
assert mock_confluence.get_page_by_id.call_count == 0
assert mock_confluence.get_all_pages_by_label.call_count == 0
assert mock_confluence.cql.call_count == 0
assert mock_confluence.get_page_child_by_type.call_count == 0
def _get_mock_confluence_loader(
self, mock_confluence: MagicMock
) -> ConfluenceLoader:
@ -163,11 +197,15 @@ class TestConfluenceLoader:
confluence_loader.confluence = mock_confluence
return confluence_loader
def _get_mock_page(self, page_id: str) -> Dict:
def _get_mock_page(
self, page_id: str, content_format: ContentFormat = ContentFormat.STORAGE
) -> Dict:
return {
"id": f"{page_id}",
"title": f"Page {page_id}",
"body": {"storage": {"value": f"<p>Content {page_id}</p>"}},
"body": {
f"{content_format.name.lower()}": {"value": f"<p>Content {page_id}</p>"}
},
"status": "current",
"type": "page",
"_links": {

View File

@ -23,7 +23,7 @@ def init_repo(tmpdir: py.path.local, dir_name: str) -> str:
git.add([sample_file])
git.commit(m="Initial commit")
return repo_dir
return str(repo_dir)
@pytest.mark.requires("git")

View File

@ -2,6 +2,11 @@
from langchain.tools import __all__ as public_api
_EXPECTED = [
"AINAppOps",
"AINOwnerOps",
"AINRuleOps",
"AINTransfer",
"AINValueOps",
"AIPluginTool",
"APIOperation",
"ArxivQueryRun",

View File

@ -0,0 +1,21 @@
from typing import List
import pytest
from langchain.utils.iter import batch_iterate
@pytest.mark.parametrize(
"input_size, input_iterable, expected_output",
[
(2, [1, 2, 3, 4, 5], [[1, 2], [3, 4], [5]]),
(3, [10, 20, 30, 40, 50], [[10, 20, 30], [40, 50]]),
(1, [100, 200, 300], [[100], [200], [300]]),
(4, [], []),
],
)
def test_batch_iterate(
input_size: int, input_iterable: List[str], expected_output: List[str]
) -> None:
"""Test batching function."""
assert list(batch_iterate(input_size, input_iterable)) == expected_output

758
poetry.lock generated

File diff suppressed because it is too large Load Diff