langchain/templates/rag-opensearch/dummy_index_setup.py

import os

from openai import OpenAI
from opensearchpy import OpenSearch

OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENSEARCH_URL = os.getenv("OPENSEARCH_URL", "https://localhost:9200")
OPENSEARCH_USERNAME = os.getenv("OPENSEARCH_USERNAME", "admin")
OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD", "admin")
OPENSEARCH_INDEX_NAME = os.getenv("OPENSEARCH_INDEX_NAME", "langchain-test")

with open("dummy_data.txt") as f:
    docs = [line.strip() for line in f.readlines()]


client_oai = OpenAI(api_key=OPENAI_API_KEY)


client = OpenSearch(
    hosts=[OPENSEARCH_URL],
    http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
    use_ssl=True,
    verify_certs=False,
)

# Define the index settings and mappings
index_settings = {
    "settings": {
        "index": {"knn": True, "number_of_shards": 1, "number_of_replicas": 0}
    },
    "mappings": {
        "properties": {
            "vector_field": {
                "type": "knn_vector",
                "dimension": 1536,
                "method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"},
            }
        }
    },
}

response = client.indices.create(index=OPENSEARCH_INDEX_NAME, body=index_settings)

print(response)


# Insert docs


for each in docs:
    res = client_oai.embeddings.create(input=each, model="text-embedding-ada-002")

    document = {
        "vector_field": res.data[0].embedding,
        "text": each,
    }

    response = client.index(index=OPENSEARCH_INDEX_NAME, body=document, refresh=True)

    print(response)
TEMPLATES Add rag-opensearch template (#13501) <!-- Thank you for contributing to LangChain! Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> Adding rag-opensearch template. --------- Signed-off-by: kalyanr <kalyan.ben10@live.com> Co-authored-by: Erick Friis <erick@langchain.dev> 2023-11-27 21:21:39 +00:00			`import os`

			`from openai import OpenAI`
			`from opensearchpy import OpenSearch`

			`OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")`
			`OPENSEARCH_URL = os.getenv("OPENSEARCH_URL", "https://localhost:9200")`
			`OPENSEARCH_USERNAME = os.getenv("OPENSEARCH_USERNAME", "admin")`
			`OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD", "admin")`
			`OPENSEARCH_INDEX_NAME = os.getenv("OPENSEARCH_INDEX_NAME", "langchain-test")`

			`with open("dummy_data.txt") as f:`
			`docs = [line.strip() for line in f.readlines()]`


			`client_oai = OpenAI(api_key=OPENAI_API_KEY)`


			`client = OpenSearch(`
			`hosts=[OPENSEARCH_URL],`
			`http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),`
			`use_ssl=True,`
			`verify_certs=False,`
			`)`

			`# Define the index settings and mappings`
			`index_settings = {`
			`"settings": {`
			`"index": {"knn": True, "number_of_shards": 1, "number_of_replicas": 0}`
			`},`
			`"mappings": {`
			`"properties": {`
			`"vector_field": {`
			`"type": "knn_vector",`
			`"dimension": 1536,`
			`"method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"},`
			`}`
			`}`
			`},`
			`}`

			`response = client.indices.create(index=OPENSEARCH_INDEX_NAME, body=index_settings)`

infra: rm unused # noqa violations (#22049) Updating #21137 2024-05-22 22:21:08 +00:00			`print(response)`
TEMPLATES Add rag-opensearch template (#13501) <!-- Thank you for contributing to LangChain! Replace this entire comment with: - Description: a description of the change, - Issue: the issue # it fixes (if applicable), - Dependencies: any dependencies required for this change, - Tag maintainer: for a quicker response, tag the relevant maintainer (see below), - Twitter handle: we announce bigger features on Twitter. If your PR gets announced, and you'd like a mention, we'll gladly shout you out! Please make sure your PR is passing linting and testing before submitting. Run `make format`, `make lint` and `make test` to check this locally. See contribution guidelines for more information on how to write/run tests, lint, etc: https://github.com/langchain-ai/langchain/blob/master/.github/CONTRIBUTING.md If you're adding a new integration, please include: 1. a test for the integration, preferably unit tests that do not rely on network access, 2. an example notebook showing its use. It lives in `docs/extras` directory. If no one reviews your PR within a few days, please @-mention one of @baskaryan, @eyurtsev, @hwchase17. --> Adding rag-opensearch template. --------- Signed-off-by: kalyanr <kalyan.ben10@live.com> Co-authored-by: Erick Friis <erick@langchain.dev> 2023-11-27 21:21:39 +00:00

			`# Insert docs`


			`for each in docs:`
			`res = client_oai.embeddings.create(input=each, model="text-embedding-ada-002")`

			`document = {`
			`"vector_field": res.data[0].embedding,`
			`"text": each,`
			`}`

			`response = client.index(index=OPENSEARCH_INDEX_NAME, body=document, refresh=True)`

infra: rm unused # noqa violations (#22049) Updating #21137 2024-05-22 22:21:08 +00:00			`print(response)`