langchain/tests/integration_tests/vectorstores/conftest.py
sergerdn 04c458a270
feat: improve pinecone tests (#2806)
Improve the integration tests for Pinecone by adding an `.env.example`
file for local testing. Additionally, add some dev dependencies
specifically for integration tests.

This change also helps me understand how Pinecone deals with certain
things, see related issues
https://github.com/hwchase17/langchain/issues/2484
https://github.com/hwchase17/langchain/issues/2816
2023-04-13 21:49:31 -07:00

75 lines
2.6 KiB
Python

import os
from typing import Generator, List, Union
import pytest
from vcr.request import Request
from langchain.document_loaders import TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.schema import Document
from langchain.text_splitter import CharacterTextSplitter
# This fixture returns a dictionary containing filter_headers options
# for replacing certain headers with dummy values during cassette playback
# Specifically, it replaces the authorization header with a dummy value to
# prevent sensitive data from being recorded in the cassette.
# It also filters request to certain hosts (specified in the `ignored_hosts` list)
# to prevent data from being recorded in the cassette.
@pytest.fixture(scope="module")
def vcr_config() -> dict:
skipped_host = ["pinecone.io"]
def before_record_response(response: dict) -> Union[dict, None]:
return response
def before_record_request(request: Request) -> Union[Request, None]:
for host in skipped_host:
if request.host.startswith(host) or request.host.endswith(host):
return None
return request
return {
"before_record_request": before_record_request,
"before_record_response": before_record_response,
"filter_headers": [
("authorization", "authorization-DUMMY"),
("X-OpenAI-Client-User-Agent", "X-OpenAI-Client-User-Agent-DUMMY"),
("Api-Key", "Api-Key-DUMMY"),
("User-Agent", "User-Agent-DUMMY"),
],
"ignore_localhost": True,
}
# Define a fixture that yields a generator object returning a list of documents
@pytest.fixture(scope="function")
def documents() -> Generator[List[Document], None, None]:
"""Return a generator that yields a list of documents."""
# Create a CharacterTextSplitter object for splitting the documents into chunks
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# Load the documents from a file located in the fixtures directory
documents = TextLoader(
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
).load()
# Yield the documents split into chunks
yield text_splitter.split_documents(documents)
@pytest.fixture(scope="function")
def texts() -> Generator[List[str], None, None]:
# Load the documents from a file located in the fixtures directory
documents = TextLoader(
os.path.join(os.path.dirname(__file__), "fixtures", "sharks.txt")
).load()
yield [doc.page_content for doc in documents]
@pytest.fixture(scope="module")
def embedding_openai() -> OpenAIEmbeddings:
return OpenAIEmbeddings()