review

9 months ago · 4caae8f8b9
parent 79245587fe
commit 4caae8f8b9
2 changed files with 6 additions and 3 deletions
--- a/src/petals/server/memory_cache.py
+++ b/src/petals/server/memory_cache.py
@ -116,7 +116,7 @@ class MemoryCache:
        return max(alloc_size_by_device.values())

    async def _schedule_alloc(
-        self, alloc_size: int, *descriptors: TensorDescriptor, timeout: float
+        self, alloc_size: int, *descriptors: TensorDescriptor, timeout: Optional[float]
    ) -> Sequence[Handle]:
        """
        This method should be called inside asyncio.shield() because:
@ -124,7 +124,7 @@ class MemoryCache:
        """
        try:
            async with self._wait_for_free_memory(alloc_size, timeout):
-                async with enter_asynchronously(self._lock_metadata):
+                with self._lock_metadata:
                    handles = tuple(int(self.handle_counter) + i for i in range(len(descriptors)))
                    self.current_size_bytes += alloc_size
                    self.handle_counter += len(handles)  # note: this will eventually overflow and it is okay
@ -179,9 +179,10 @@ class MemoryCache:
            raise AllocationFailed(
                f"Could not allocate {allocated_size} bytes, max cache size = {self.max_size_bytes} bytes"
            )
+        timeout = timeout if timeout != float('inf') else None
        deadline = None if timeout is None else time.perf_counter() + timeout
        while self.current_size_bytes + allocated_size > self.max_size_bytes:
-            remaining_time = deadline - time.perf_counter() if timeout is not None else None
+            remaining_time = None if timeout is None else deadline - time.perf_counter()
            if not self._memory_freed_event.wait(remaining_time):
                raise AllocationFailed(
                    f"Server's attention cache is full, failed to allocate {allocated_size} bytes in {timeout} seconds"
--- a/tests/test_cache.py
+++ b/tests/test_cache.py
@ -83,6 +83,7 @@ async def test_unlimited_timeout():
            await asyncio.sleep(0.5)

    alloc_task = asyncio.create_task(_klog_the_cache())
+    await asyncio.sleep(0.1)
    async with cache.allocate_cache(_make_tensor_descriptor(768), timeout=float("inf")):
        await alloc_task
    assert 0.5 < time.perf_counter() - t_start < 0.6, "memory should be allocated after background task clears"
@ -160,6 +161,7 @@ async def test_cache_usage():

    dealloc_a_event.set()
    (handle_e,) = pipe_receiver.recv()  # e can finally be allocated
+    await asyncio.sleep(0.1)
    assert cache.current_size_bytes == 1536  # tensor e should finally be able to allocate

    with pytest.raises(KeyError):