extra test

10 months ago · de6b00e4f2
parent 5b26ba198e
commit de6b00e4f2
2 changed files with 20 additions and 5 deletions
--- a/src/petals/server/memory_cache.py
+++ b/src/petals/server/memory_cache.py
@ -70,7 +70,7 @@ class MemoryCache:

    @contextlib.asynccontextmanager
    async def allocate_cache(
-        self, *descriptors: TensorDescriptor, timeout: Optional[float]
+        self, *descriptors: TensorDescriptor, timeout: float
    ) -> AsyncContextManager[Sequence[Handle]]:
        """
        Create a handle that is associated with buffers on unique device. If cache full, raises AllocationFailed.
@ -87,7 +87,7 @@ class MemoryCache:
        assert os.getpid() != self.runtime_pid, "must be called by a ConnectionHandler, not runtime"
        assert all(descr.device is not None for descr in descriptors), "please specify allocated devices"
        if self.max_alloc_timeout is not None:
-            timeout = min(timeout, self.max_alloc_timeout) if timeout is not None else self.max_alloc_timeout
+            timeout = min(timeout, self.max_alloc_timeout)
        max_alloc_size = self.get_allocation_size(*descriptors)

        gib = 1024**3
@ -116,7 +116,7 @@ class MemoryCache:
        return max(alloc_size_by_device.values())

    async def _schedule_alloc(
-        self, alloc_size: int, *descriptors: TensorDescriptor, timeout: Optional[float]
+        self, alloc_size: int, *descriptors: TensorDescriptor, timeout: float
    ) -> Sequence[Handle]:
        """
        This method should be called inside asyncio.shield() because:
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@ -36,7 +36,7 @@ async def test_cache_timeout():
                    async with cache.allocate_cache(_make_tensor_descriptor(768), timeout=0.1):
                        pass
                assert 0.1 < time.perf_counter() - t_start < 0.2, "wait time exceeds alloc timeout"
-                async with cache.allocate_cache(_make_tensor_descriptor(128), timeout=None):
+                async with cache.allocate_cache(_make_tensor_descriptor(128), timeout=float('inf')):
                    pass

                t_start = time.perf_counter()
@ -54,7 +54,7 @@ async def test_cache_timeout():

            t_start = time.perf_counter()
            await asyncio.sleep(0.05)  # wait for large alloc to enqueue
-            async with cache.allocate_cache(_make_tensor_descriptor(128), timeout=None):  # exceeds max timeout
+            async with cache.allocate_cache(_make_tensor_descriptor(128), timeout=float('inf')):  # exceeds max timeout
                pass  # this memory should allocate once the background task clears the queue
            assert 0.2 < time.perf_counter() - t_start < 0.3, "memory should be allocated after background task clears"
            with pytest.raises(AllocationFailed):
@ -72,6 +72,21 @@ async def test_cache_timeout():
                await large_alloc_task


+@pytest.mark.asyncio
+async def test_unlimited_timeout():
+    cache = MemoryCache(max_size_bytes=1024)
+    cache.runtime_pid += 1  # pretend we're another process
+    t_start = time.perf_counter()
+
+    async def _klog_the_cache():
+        async with cache.allocate_cache(_make_tensor_descriptor(512), timeout=0.2):
+            await asyncio.sleep(0.5)
+    alloc_task = asyncio.create_task(_klog_the_cache())
+    async with cache.allocate_cache(_make_tensor_descriptor(768), timeout=float('inf')):
+        await alloc_task
+    assert 0.5 < time.perf_counter() - t_start < 0.6, "memory should be allocated after background task clears"
+
+
@pytest.mark.asyncio
 async def test_cache_usage():
    cache = MemoryCache(max_size_bytes=2048)