2022-11-07 13:46:44 +00:00
|
|
|
"""Test huggingface embeddings."""
|
2022-11-09 21:26:58 +00:00
|
|
|
|
2023-12-11 21:53:30 +00:00
|
|
|
from langchain_community.embeddings.huggingface import (
|
2023-02-02 16:44:02 +00:00
|
|
|
HuggingFaceEmbeddings,
|
|
|
|
HuggingFaceInstructEmbeddings,
|
|
|
|
)
|
2022-11-07 13:46:44 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_huggingface_embedding_documents() -> None:
|
|
|
|
"""Test huggingface embeddings."""
|
|
|
|
documents = ["foo bar"]
|
|
|
|
embedding = HuggingFaceEmbeddings()
|
|
|
|
output = embedding.embed_documents(documents)
|
|
|
|
assert len(output) == 1
|
|
|
|
assert len(output[0]) == 768
|
|
|
|
|
|
|
|
|
|
|
|
def test_huggingface_embedding_query() -> None:
|
|
|
|
"""Test huggingface embeddings."""
|
|
|
|
document = "foo bar"
|
2023-05-02 03:27:41 +00:00
|
|
|
embedding = HuggingFaceEmbeddings(encode_kwargs={"batch_size": 16})
|
2022-11-07 13:46:44 +00:00
|
|
|
output = embedding.embed_query(document)
|
|
|
|
assert len(output) == 768
|
2023-02-02 16:44:02 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_huggingface_instructor_embedding_documents() -> None:
|
|
|
|
"""Test huggingface embeddings."""
|
|
|
|
documents = ["foo bar"]
|
2023-05-30 18:57:04 +00:00
|
|
|
model_name = "hkunlp/instructor-base"
|
|
|
|
embedding = HuggingFaceInstructEmbeddings(model_name=model_name)
|
2023-02-02 16:44:02 +00:00
|
|
|
output = embedding.embed_documents(documents)
|
|
|
|
assert len(output) == 1
|
|
|
|
assert len(output[0]) == 768
|
|
|
|
|
|
|
|
|
|
|
|
def test_huggingface_instructor_embedding_query() -> None:
|
|
|
|
"""Test huggingface embeddings."""
|
|
|
|
query = "foo bar"
|
2023-05-30 18:57:04 +00:00
|
|
|
model_name = "hkunlp/instructor-base"
|
|
|
|
embedding = HuggingFaceInstructEmbeddings(model_name=model_name)
|
2023-02-02 16:44:02 +00:00
|
|
|
output = embedding.embed_query(query)
|
|
|
|
assert len(output) == 768
|
2023-05-30 18:57:04 +00:00
|
|
|
|
|
|
|
|
|
|
|
def test_huggingface_instructor_embedding_normalize() -> None:
|
|
|
|
"""Test huggingface embeddings."""
|
|
|
|
query = "foo bar"
|
|
|
|
model_name = "hkunlp/instructor-base"
|
|
|
|
encode_kwargs = {"normalize_embeddings": True}
|
|
|
|
embedding = HuggingFaceInstructEmbeddings(
|
|
|
|
model_name=model_name, encode_kwargs=encode_kwargs
|
|
|
|
)
|
|
|
|
output = embedding.embed_query(query)
|
|
|
|
assert len(output) == 768
|
|
|
|
eps = 1e-5
|
|
|
|
norm = sum([o**2 for o in output])
|
|
|
|
assert abs(1 - norm) <= eps
|