langchain/templates/rag-momento-vector-index/rag_momento_vector_index/ingest.py
Bagatur fa5d49f2c1
docs, experimental[patch], langchain[patch], community[patch]: update storage imports (#15429)
ran 
```bash
g grep -l "langchain.vectorstores" | xargs -L 1 sed -i '' "s/langchain\.vectorstores/langchain_community.vectorstores/g"
g grep -l "langchain.document_loaders" | xargs -L 1 sed -i '' "s/langchain\.document_loaders/langchain_community.document_loaders/g"
g grep -l "langchain.chat_loaders" | xargs -L 1 sed -i '' "s/langchain\.chat_loaders/langchain_community.chat_loaders/g"
g grep -l "langchain.document_transformers" | xargs -L 1 sed -i '' "s/langchain\.document_transformers/langchain_community.document_transformers/g"
g grep -l "langchain\.graphs" | xargs -L 1 sed -i '' "s/langchain\.graphs/langchain_community.graphs/g"
g grep -l "langchain\.memory\.chat_message_histories" | xargs -L 1 sed -i '' "s/langchain\.memory\.chat_message_histories/langchain_community.chat_message_histories/g"
gco master libs/langchain/tests/unit_tests/*/test_imports.py
gco master libs/langchain/tests/unit_tests/**/test_public_api.py
```
2024-01-02 16:47:11 -05:00

39 lines
1.3 KiB
Python

### Ingest code - you may need to run this the first time
import os
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import WebBaseLoader
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores import MomentoVectorIndex
from momento import (
CredentialProvider,
PreviewVectorIndexClient,
VectorIndexConfigurations,
)
def load(API_KEY_ENV_VAR_NAME: str, index_name: str) -> None:
if os.environ.get(API_KEY_ENV_VAR_NAME, None) is None:
raise Exception(f"Missing `{API_KEY_ENV_VAR_NAME}` environment variable.")
# Load
loader = WebBaseLoader("https://lilianweng.github.io/posts/2023-06-23-agent/")
data = loader.load()
# Split
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)
# Add to vectorDB
MomentoVectorIndex.from_documents(
all_splits,
embedding=OpenAIEmbeddings(),
client=PreviewVectorIndexClient(
configuration=VectorIndexConfigurations.Default.latest(),
credential_provider=CredentialProvider.from_environment_variable(
API_KEY_ENV_VAR_NAME
),
),
index_name=index_name,
)