langchain/tests/integration_tests/embeddings/test_openai.py
Brendan Graham d718f3b6d0
feat: interfaces for async embeddings, implement async openai (#6563)
Since it seems like #6111 will be blocked for a bit, I've forked
@tyree731's fork and implemented the requested changes.

This change adds support to the base Embeddings class for two methods,
aembed_query and aembed_documents, those two methods supporting async
equivalents of embed_query and
embed_documents respectively. This ever so slightly rounds out async
support within langchain, with an initial implementation of this
functionality being implemented for openai.

Implements https://github.com/hwchase17/langchain/issues/6109

---------

Co-authored-by: Stephen Tyree <tyree731@gmail.com>
2023-06-21 23:16:33 -07:00

72 lines
2.2 KiB
Python

"""Test openai embeddings."""
import numpy as np
import openai
import pytest
from langchain.embeddings.openai import OpenAIEmbeddings
def test_openai_embedding_documents() -> None:
"""Test openai embeddings."""
documents = ["foo bar"]
embedding = OpenAIEmbeddings()
output = embedding.embed_documents(documents)
assert len(output) == 1
assert len(output[0]) == 1536
def test_openai_embedding_documents_multiple() -> None:
"""Test openai embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = OpenAIEmbeddings(chunk_size=2)
embedding.embedding_ctx_length = 8191
output = embedding.embed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1536
assert len(output[1]) == 1536
assert len(output[2]) == 1536
@pytest.mark.asyncio
async def test_openai_embedding_documents_async_multiple() -> None:
"""Test openai embeddings."""
documents = ["foo bar", "bar foo", "foo"]
embedding = OpenAIEmbeddings(chunk_size=2)
embedding.embedding_ctx_length = 8191
output = await embedding.aembed_documents(documents)
assert len(output) == 3
assert len(output[0]) == 1536
assert len(output[1]) == 1536
assert len(output[2]) == 1536
def test_openai_embedding_query() -> None:
"""Test openai embeddings."""
document = "foo bar"
embedding = OpenAIEmbeddings()
output = embedding.embed_query(document)
assert len(output) == 1536
@pytest.mark.asyncio
async def test_openai_embedding_async_query() -> None:
"""Test openai embeddings."""
document = "foo bar"
embedding = OpenAIEmbeddings()
output = await embedding.aembed_query(document)
assert len(output) == 1536
def test_openai_embedding_with_empty_string() -> None:
"""Test openai embeddings with empty string."""
document = ["", "abc"]
embedding = OpenAIEmbeddings()
output = embedding.embed_documents(document)
assert len(output) == 2
assert len(output[0]) == 1536
expected_output = openai.Embedding.create(input="", model="text-embedding-ada-002")[
"data"
][0]["embedding"]
assert np.allclose(output[0], expected_output)
assert len(output[1]) == 1536