mirror of
https://github.com/hwchase17/langchain
synced 2024-10-31 15:20:26 +00:00
3a2eb6e12b
Added noqa for existing prints. Can slowly remove / will prevent more being intro'd
61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
import os
|
|
|
|
from openai import OpenAI
|
|
from opensearchpy import OpenSearch
|
|
|
|
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")
|
|
OPENSEARCH_URL = os.getenv("OPENSEARCH_URL", "https://localhost:9200")
|
|
OPENSEARCH_USERNAME = os.getenv("OPENSEARCH_USERNAME", "admin")
|
|
OPENSEARCH_PASSWORD = os.getenv("OPENSEARCH_PASSWORD", "admin")
|
|
OPENSEARCH_INDEX_NAME = os.getenv("OPENSEARCH_INDEX_NAME", "langchain-test")
|
|
|
|
with open("dummy_data.txt") as f:
|
|
docs = [line.strip() for line in f.readlines()]
|
|
|
|
|
|
client_oai = OpenAI(api_key=OPENAI_API_KEY)
|
|
|
|
|
|
client = OpenSearch(
|
|
hosts=[OPENSEARCH_URL],
|
|
http_auth=(OPENSEARCH_USERNAME, OPENSEARCH_PASSWORD),
|
|
use_ssl=True,
|
|
verify_certs=False,
|
|
)
|
|
|
|
# Define the index settings and mappings
|
|
index_settings = {
|
|
"settings": {
|
|
"index": {"knn": True, "number_of_shards": 1, "number_of_replicas": 0}
|
|
},
|
|
"mappings": {
|
|
"properties": {
|
|
"vector_field": {
|
|
"type": "knn_vector",
|
|
"dimension": 1536,
|
|
"method": {"name": "hnsw", "space_type": "l2", "engine": "faiss"},
|
|
}
|
|
}
|
|
},
|
|
}
|
|
|
|
response = client.indices.create(index=OPENSEARCH_INDEX_NAME, body=index_settings)
|
|
|
|
print(response) # noqa: T201
|
|
|
|
|
|
# Insert docs
|
|
|
|
|
|
for each in docs:
|
|
res = client_oai.embeddings.create(input=each, model="text-embedding-ada-002")
|
|
|
|
document = {
|
|
"vector_field": res.data[0].embedding,
|
|
"text": each,
|
|
}
|
|
|
|
response = client.index(index=OPENSEARCH_INDEX_NAME, body=document, refresh=True)
|
|
|
|
print(response) # noqa: T201
|