langchain/templates/rag-opensearch/dummy_index_setup.py
Erick Friis 3a2eb6e12b
infra: add print rule to ruff (#16221)
Added noqa for existing prints. Can slowly remove / will prevent more
being intro'd
2024-02-09 16:13:30 -08:00

61 lines
1.5 KiB
Python

import os
from openai import OpenAI
from opensearchpy import OpenSearch
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
OPENSEARCH_URL = os.getenv("OPENSEARCH_URL", "https://localhost:9200")
OPENSEARCH_USERNAME = os.getenv("OPENSEARCH_USERNAME", "admin")
OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD", "admin")
OPENSEARCH_INDEX_NAME = os.getenv("OPENSEARCH_INDEX_NAME", "langchain-test")
with open("dummy_data.txt") as f:
docs = [line.strip() for line in f.readlines()]
client_oai = OpenAI(api_key=OPENAI_API_KEY)
client = OpenSearch(
hosts=[OPENSEARCH_URL],
http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
use_ssl=True,
verify_certs=False,
)
# Define the index settings and mappings
index_settings = {
"settings": {
"index": {"knn": True, "number_of_shards": 1, "number_of_replicas": 0}
},
"mappings": {
"properties": {
"vector_field": {
"type": "knn_vector",
"dimension": 1536,
"method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"},
}
}
},
}
response = client.indices.create(index=OPENSEARCH_INDEX_NAME, body=index_settings)
print(response) # noqa: T201
# Insert docs
for each in docs:
res = client_oai.embeddings.create(input=each, model="text-embedding-ada-002")
document = {
"vector_field": res.data[0].embedding,
"text": each,
}
response = client.index(index=OPENSEARCH_INDEX_NAME, body=document, refresh=True)
print(response) # noqa: T201