langchain/libs/community/tests/integration_tests/test_pdf_pagesplitter.py

20 lines
744 B
Python
Raw Normal View History

2023-07-21 16:20:24 +00:00
"""Test splitting with page numbers included."""
import os
from langchain_community.document_loaders import PyPDFLoader
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
2023-07-21 16:20:24 +00:00
def test_pdf_pagesplitter() -> None:
"""Test splitting with page numbers included."""
script_dir = os.path.dirname(__file__)
loader = PyPDFLoader(os.path.join(script_dir, "examples/hello.pdf"))
docs = loader.load()
assert "page" in docs[0].metadata
assert "source" in docs[0].metadata
faiss_index = FAISS.from_documents(docs, OpenAIEmbeddings())
docs = faiss_index.similarity_search("Complete this sentence: Hello", k=1)
assert "Hello world" in docs[0].page_content