mirror of
https://github.com/hwchase17/langchain
synced 2024-11-10 01:10:59 +00:00
core[patch]: Relax rate limit unit tests in terms of timing (#25140)
Relax rate limit unit tests
This commit is contained in:
parent
a2e9910268
commit
28e0958ff4
@ -9,9 +9,12 @@ def test_rate_limit_invoke() -> None:
|
||||
"""Add rate limiter."""
|
||||
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
messages=iter(["hello", "world"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
tic = time.time()
|
||||
@ -19,22 +22,14 @@ def test_rate_limit_invoke() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
assert 0.10 < toc - tic < 0.15
|
||||
|
||||
tic = time.time()
|
||||
model.invoke("foo")
|
||||
toc = time.time()
|
||||
# The second time we call the model, we should have 1 extra token
|
||||
# to proceed immediately.
|
||||
assert toc - tic < 0.005
|
||||
|
||||
# The third time we call the model, we need to wait again for a token
|
||||
tic = time.time()
|
||||
model.invoke("foo")
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
# Second time we check the model, we should have 1 extra token
|
||||
# since the sleep time is 0.1 seconds
|
||||
assert 0.00 < toc - tic < 0.10
|
||||
|
||||
|
||||
async def test_rate_limit_ainvoke() -> None:
|
||||
@ -43,7 +38,10 @@ async def test_rate_limit_ainvoke() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
tic = time.time()
|
||||
@ -58,7 +56,7 @@ async def test_rate_limit_ainvoke() -> None:
|
||||
toc = time.time()
|
||||
# The second time we call the model, we should have 1 extra token
|
||||
# to proceed immediately.
|
||||
assert toc - tic < 0.01
|
||||
assert toc - tic < 0.1
|
||||
|
||||
# The third time we call the model, we need to wait again for a token
|
||||
tic = time.time()
|
||||
@ -74,17 +72,16 @@ def test_rate_limit_batch() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.01,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Need 2 tokens to proceed
|
||||
time_to_fill = 2 / 200.0
|
||||
tic = time.time()
|
||||
model.batch(["foo", "foo"])
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert time_to_fill < toc - tic < time_to_fill + 0.03
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
|
||||
async def test_rate_limit_abatch() -> None:
|
||||
@ -92,17 +89,16 @@ async def test_rate_limit_abatch() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.01,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Need 2 tokens to proceed
|
||||
time_to_fill = 2 / 200.0
|
||||
tic = time.time()
|
||||
await model.abatch(["foo", "foo"])
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert time_to_fill < toc - tic < time_to_fill + 0.03
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
|
||||
def test_rate_limit_stream() -> None:
|
||||
@ -110,7 +106,10 @@ def test_rate_limit_stream() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello world", "hello world", "hello world"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=200, check_every_n_seconds=0.01, max_bucket_size=10
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Check astream
|
||||
@ -119,52 +118,54 @@ def test_rate_limit_stream() -> None:
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert 0.01 < toc - tic < 0.02 # Slightly smaller than check every n seconds
|
||||
|
||||
# Second time around we should have 1 token left
|
||||
tic = time.time()
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert toc - tic < 0.005 # Slightly smaller than check every n seconds
|
||||
|
||||
# Third time around we should have 0 tokens left
|
||||
tic = time.time()
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert 0.01 < toc - tic < 0.02 # Slightly smaller than check every n seconds
|
||||
|
||||
|
||||
async def test_rate_limit_astream() -> None:
|
||||
"""Test rate limiting astream."""
|
||||
rate_limiter = InMemoryRateLimiter(
|
||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=10
|
||||
)
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello world", "hello world", "hello world"]),
|
||||
rate_limiter=rate_limiter,
|
||||
)
|
||||
# Check astream
|
||||
tic = time.time()
|
||||
response = [chunk async for chunk in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
# Second time around we should have 1 token left
|
||||
tic = time.time()
|
||||
response = [chunk async for chunk in model.astream("foo")]
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert toc - tic < 0.01 # Slightly smaller than check every n seconds
|
||||
assert toc - tic < 0.1 # Slightly smaller than check every n seconds
|
||||
|
||||
# Third time around we should have 0 tokens left
|
||||
tic = time.time()
|
||||
response = [chunk async for chunk in model.astream("foo")]
|
||||
response = list(model.stream("foo"))
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
|
||||
async def test_rate_limit_astream() -> None:
|
||||
"""Test rate limiting astream."""
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello world", "hello world", "hello world"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=10,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
)
|
||||
# Check astream
|
||||
tic = time.time()
|
||||
response = [msg async for msg in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
# Second time around we should have 1 token left
|
||||
tic = time.time()
|
||||
response = [msg async for msg in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
assert toc - tic < 0.1 # Slightly smaller than check every n seconds
|
||||
|
||||
# Third time around we should have 0 tokens left
|
||||
tic = time.time()
|
||||
response = [msg async for msg in model.astream("foo")]
|
||||
assert [msg.content for msg in response] == ["hello", " ", "world"]
|
||||
toc = time.time()
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
@ -176,7 +177,10 @@ def test_rate_limit_skips_cache() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=100, check_every_n_seconds=0.01, max_bucket_size=1
|
||||
requests_per_second=20,
|
||||
check_every_n_seconds=0.1,
|
||||
max_bucket_size=1,
|
||||
# At 20 requests per second we see a refresh every 0.05 seconds
|
||||
),
|
||||
cache=cache,
|
||||
)
|
||||
@ -186,7 +190,7 @@ def test_rate_limit_skips_cache() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
for _ in range(2):
|
||||
# Cache hits
|
||||
@ -195,7 +199,7 @@ def test_rate_limit_skips_cache() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert toc - tic < 0.005
|
||||
assert toc - tic < 0.05
|
||||
|
||||
# Test verifies that there's only a single key
|
||||
# Test also verifies that rate_limiter information is not part of the
|
||||
@ -236,7 +240,7 @@ async def test_rate_limit_skips_cache_async() -> None:
|
||||
model = GenericFakeChatModel(
|
||||
messages=iter(["hello", "world", "!"]),
|
||||
rate_limiter=InMemoryRateLimiter(
|
||||
requests_per_second=100, check_every_n_seconds=0.01, max_bucket_size=1
|
||||
requests_per_second=20, check_every_n_seconds=0.1, max_bucket_size=1
|
||||
),
|
||||
cache=cache,
|
||||
)
|
||||
@ -246,7 +250,7 @@ async def test_rate_limit_skips_cache_async() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert 0.01 < toc - tic < 0.02
|
||||
assert 0.1 < toc - tic < 0.2
|
||||
|
||||
for _ in range(2):
|
||||
# Cache hits
|
||||
@ -255,4 +259,4 @@ async def test_rate_limit_skips_cache_async() -> None:
|
||||
toc = time.time()
|
||||
# Should be larger than check every n seconds since the token bucket starts
|
||||
# with 0 tokens.
|
||||
assert toc - tic < 0.005
|
||||
assert toc - tic < 0.05
|
||||
|
Loading…
Reference in New Issue
Block a user