forked from Archives/langchain
b3a5b51728
Minor cosmetic changes - Activeloop environment cred authentication in notebooks with `getpass.getpass` (instead of CLI which not always works) - much faster tests with Deep Lake pytest mode on - Deep Lake kwargs pass Notes - I put pytest environment creds inside `vectorstores/conftest.py`, but feel free to suggest a better location. For context, if I put in `test_deeplake.py`, `ruff` doesn't let me to set them before import deeplake --------- Co-authored-by: Davit Buniatyan <d@activeloop.ai>
82 lines
2.9 KiB
Python
82 lines
2.9 KiB
Python
import os
|
|
from typing import Generator, List, Union
|
|
|
|
import pytest
|
|
from vcr.request import Request
|
|
|
|
from langchain.document_loaders import TextLoader
|
|
from langchain.embeddings import OpenAIEmbeddings
|
|
from langchain.schema import Document
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
|
|
# Those environment variables turn on Deep Lake pytest mode.
|
|
# It significantly makes tests run much faster.
|
|
# Need to run before `import deeplake`
|
|
os.environ["BUGGER_OFF"] = "true"
|
|
os.environ["DEEPLAKE_DOWNLOAD_PATH"] = "./testing/local_storage"
|
|
os.environ["DEEPLAKE_PYTEST_ENABLED"] = "true"
|
|
|
|
|
|
# This fixture returns a dictionary containing filter_headers options
|
|
# for replacing certain headers with dummy values during cassette playback
|
|
# Specifically, it replaces the authorization header with a dummy value to
|
|
# prevent sensitive data from being recorded in the cassette.
|
|
# It also filters request to certain hosts (specified in the `ignored_hosts` list)
|
|
# to prevent data from being recorded in the cassette.
|
|
@pytest.fixture(scope="module")
|
|
def vcr_config() -> dict:
|
|
skipped_host = ["pinecone.io"]
|
|
|
|
def before_record_response(response: dict) -> Union[dict, None]:
|
|
return response
|
|
|
|
def before_record_request(request: Request) -> Union[Request, None]:
|
|
for host in skipped_host:
|
|
if request.host.startswith(host) or request.host.endswith(host):
|
|
return None
|
|
return request
|
|
|
|
return {
|
|
"before_record_request": before_record_request,
|
|
"before_record_response": before_record_response,
|
|
"filter_headers": [
|
|
("authorization", "authorization-DUMMY"),
|
|
("X-OpenAI-Client-User-Agent", "X-OpenAI-Client-User-Agent-DUMMY"),
|
|
("Api-Key", "Api-Key-DUMMY"),
|
|
("User-Agent", "User-Agent-DUMMY"),
|
|
],
|
|
"ignore_localhost": True,
|
|
}
|
|
|
|
|
|
# Define a fixture that yields a generator object returning a list of documents
|
|
@pytest.fixture(scope="function")
|
|
def documents() -> Generator[List[Document], None, None]:
|
|
"""Return a generator that yields a list of documents."""
|
|
|
|
# Create a CharacterTextSplitter object for splitting the documents into chunks
|
|
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
|
|
|
|
# Load the documents from a file located in the fixtures directory
|
|
documents = TextLoader(
|
|
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
|
|
).load()
|
|
|
|
# Yield the documents split into chunks
|
|
yield text_splitter.split_documents(documents)
|
|
|
|
|
|
@pytest.fixture(scope="function")
|
|
def texts() -> Generator[List[str], None, None]:
|
|
# Load the documents from a file located in the fixtures directory
|
|
documents = TextLoader(
|
|
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
|
|
).load()
|
|
|
|
yield [doc.page_content for doc in documents]
|
|
|
|
|
|
@pytest.fixture(scope="module")
|
|
def embedding_openai() -> OpenAIEmbeddings:
|
|
return OpenAIEmbeddings()
|