import getpass import os from langchain.document_loaders import PyPDFLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores.milvus import Milvus from langchain_nvidia_aiplay import NVIDIAEmbeddings if os.environ.get("NVIDIA_API_KEY", "").startswith("nvapi-"): print("Valid NVIDIA_API_KEY already in environment. Delete to reset") else: nvapi_key = getpass.getpass("NVAPI Key (starts with nvapi-): ") assert nvapi_key.startswith("nvapi-"), f"{nvapi_key[:5]}... is not a valid key" os.environ["NVIDIA_API_KEY"] = nvapi_key # Note: if you change this, you should also change it in `nvidia_rag_canonical/chain.py` EMBEDDING_MODEL = "nvolveqa_40k" HOST = "127.0.0.1" PORT = "19530" COLLECTION_NAME = "test" embeddings = NVIDIAEmbeddings(model=EMBEDDING_MODEL) if __name__ == "__main__": # Load docs loader = PyPDFLoader("https://www.ssa.gov/news/press/factsheets/basicfact-alt.pdf") data = loader.load() # Split docs text_splitter = CharacterTextSplitter(chunk_size=300, chunk_overlap=100) docs = text_splitter.split_documents(data) # Insert the documents in Milvus Vector Store vector_db = Milvus.from_documents( docs, embeddings, collection_name=COLLECTION_NAME, connection_args={"host": HOST, "port": PORT}, )