mirror of
https://github.com/hwchase17/langchain
synced 2024-11-06 03:20:49 +00:00
64 lines
1.5 KiB
Python
64 lines
1.5 KiB
Python
|
"""Test Vertex AI embeddings API wrapper.
|
||
|
"""
|
||
|
|
||
|
from langchain_community.embeddings import VertexAIEmbeddings
|
||
|
|
||
|
|
||
|
def test_split_by_punctuation() -> None:
|
||
|
parts = VertexAIEmbeddings._split_by_punctuation(
|
||
|
"Hello, my friend!\nHow are you?\nI have 2 news:\n\n\t- Good,\n\t- Bad."
|
||
|
)
|
||
|
assert parts == [
|
||
|
"Hello",
|
||
|
",",
|
||
|
" ",
|
||
|
"my",
|
||
|
" ",
|
||
|
"friend",
|
||
|
"!",
|
||
|
"\n",
|
||
|
"How",
|
||
|
" ",
|
||
|
"are",
|
||
|
" ",
|
||
|
"you",
|
||
|
"?",
|
||
|
"\n",
|
||
|
"I",
|
||
|
" ",
|
||
|
"have",
|
||
|
" ",
|
||
|
"2",
|
||
|
" ",
|
||
|
"news",
|
||
|
":",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"\t",
|
||
|
"-",
|
||
|
" ",
|
||
|
"Good",
|
||
|
",",
|
||
|
"\n",
|
||
|
"\t",
|
||
|
"-",
|
||
|
" ",
|
||
|
"Bad",
|
||
|
".",
|
||
|
]
|
||
|
|
||
|
|
||
|
def test_batching() -> None:
|
||
|
long_text = "foo " * 500 # 1000 words, 2000 tokens
|
||
|
long_texts = [long_text for _ in range(0, 250)]
|
||
|
documents251 = ["foo bar" for _ in range(0, 251)]
|
||
|
five_elem = VertexAIEmbeddings._prepare_batches(long_texts, 5)
|
||
|
default250_elem = VertexAIEmbeddings._prepare_batches(long_texts, 250)
|
||
|
batches251 = VertexAIEmbeddings._prepare_batches(documents251, 250)
|
||
|
assert len(five_elem) == 50 # 250/5 items
|
||
|
assert len(five_elem[0]) == 5 # 5 items per batch
|
||
|
assert len(default250_elem[0]) == 10 # Should not be more than 20K tokens
|
||
|
assert len(default250_elem) == 25
|
||
|
assert len(batches251[0]) == 250
|
||
|
assert len(batches251[1]) == 1
|