petals/src/petals/utils/disk_cache.py

import fcntl
import os
import shutil
from contextlib import contextmanager
from pathlib import Path
from typing import Optional

import huggingface_hub
from hivemind.utils.logging import get_logger

logger = get_logger(__name__)

DEFAULT_CACHE_DIR = os.getenv("PETALS_CACHE", Path(Path.home(), ".cache", "petals"))

BLOCKS_LOCK_FILE = "blocks.lock"


@contextmanager
def _blocks_lock(cache_dir: Optional[str], mode: int):
    if cache_dir is None:
        cache_dir = DEFAULT_CACHE_DIR
    lock_path = Path(cache_dir, BLOCKS_LOCK_FILE)

    os.makedirs(lock_path.parent, exist_ok=True)
    with open(lock_path, "wb+") as lock_fd:
        fcntl.flock(lock_fd.fileno(), mode)
        # The OS will release the lock when lock_fd is closed or the process is killed
        yield


def allow_cache_reads(cache_dir: Optional[str]):
    """Allows simultaneous reads, guarantees that blocks won't be removed along the way (shared lock)"""
    return _blocks_lock(cache_dir, fcntl.LOCK_SH)


def allow_cache_writes(cache_dir: Optional[str]):
    """Allows saving new blocks and removing the old ones (exclusive lock)"""
    return _blocks_lock(cache_dir, fcntl.LOCK_EX)


def free_disk_space_for(
    size: int,
    *,
    cache_dir: Optional[str],
    max_disk_space: Optional[int],
    os_quota: int = 1024**3,  # Minimal space we should leave to keep OS function normally
):
    if cache_dir is None:
        cache_dir = DEFAULT_CACHE_DIR
    cache_info = huggingface_hub.scan_cache_dir(cache_dir)

    available_space = shutil.disk_usage(cache_dir).free - os_quota
    if max_disk_space is not None:
        available_space = min(available_space, max_disk_space - cache_info.size_on_disk)

    gib = 1024**3
    logger.debug(f"Disk space: required {size / gib:.1f} GiB, available {available_space / gib:.1f} GiB")
    if size <= available_space:
        return

    cached_files = [file for repo in cache_info.repos for revision in repo.revisions for file in revision.files]

    # Remove as few least recently used files as possible
    removed_files = []
    freed_space = 0
    extra_space_needed = size - available_space
    for file in sorted(cached_files, key=lambda file: file.blob_last_accessed):
        os.remove(file.file_path)  # Remove symlink
        os.remove(file.blob_path)  # Remove contents

        removed_files.append(file)
        freed_space += file.size_on_disk
        if freed_space >= extra_space_needed:
            break
    if removed_files:
        logger.info(f"Removed {len(removed_files)} files to free {freed_space / gib:.1f} GiB of disk space")
        logger.debug(f"Removed paths: {[str(file.file_path) for file in removed_files]}")

    if freed_space < extra_space_needed:
        raise RuntimeError(
            f"Insufficient disk space to load a block. Please free {(extra_space_needed - freed_space) / gib:.1f} GiB "
            f"on the volume for {cache_dir} or increase --max_disk_space if you set it manually"
        )
Clean up disk space (#152) 1 year ago			`import fcntl`
Use common folder for all caches, make it a volume in Dockerfile (#141) 1 year ago			`import os`
Clean up disk space (#152) 1 year ago			`import shutil`
			`from contextlib import contextmanager`
Use common folder for all caches, make it a volume in Dockerfile (#141) 1 year ago			`from pathlib import Path`
Clean up disk space (#152) 1 year ago			`from typing import Optional`

			`import huggingface_hub`
			`from hivemind.utils.logging import get_logger`

Use get_logger(__name__) instead of get_logger(__file__) (#265) 1 year ago			`logger = get_logger(__name__)`
Use common folder for all caches, make it a volume in Dockerfile (#141) 1 year ago
			`DEFAULT_CACHE_DIR = os.getenv("PETALS_CACHE", Path(Path.home(), ".cache", "petals"))`
Clean up disk space (#152) 1 year ago
			`BLOCKS_LOCK_FILE = "blocks.lock"`


			`@contextmanager`
			`def _blocks_lock(cache_dir: Optional[str], mode: int):`
			`if cache_dir is None:`
			`cache_dir = DEFAULT_CACHE_DIR`
			`lock_path = Path(cache_dir, BLOCKS_LOCK_FILE)`

			`os.makedirs(lock_path.parent, exist_ok=True)`
Fix file locks in NFS-mounted directories (#517) Fix #515. 8 months ago			`with open(lock_path, "wb+") as lock_fd:`
Clean up disk space (#152) 1 year ago			`fcntl.flock(lock_fd.fileno(), mode)`
			`# The OS will release the lock when lock_fd is closed or the process is killed`
			`yield`


			`def allow_cache_reads(cache_dir: Optional[str]):`
			`"""Allows simultaneous reads, guarantees that blocks won't be removed along the way (shared lock)"""`
			`return _blocks_lock(cache_dir, fcntl.LOCK_SH)`


Allow free_disk_space_for() remove arbitrary files from Petals cache (#339) Before this PR, `free_disk_space_for()` was able to remove (a) only entire cached revisions (= git commits/branches) and (b) only from the repository we're loading right now. This PR allows this functions to remove arbitrary files separately from any repositories. This is useful for transition to Petals 1.2.0+, since it now uses original repos instead of the ones with converted models (see #323). In particular, the cache for `bigscience/bloom-petals` is now deprecated and should be removed in favor of `bigscience/bloom`. This is also useful as a way to free space before loading LoRA adapters (#335). 11 months ago			`def allow_cache_writes(cache_dir: Optional[str]):`
Clean up disk space (#152) 1 year ago			`"""Allows saving new blocks and removing the old ones (exclusive lock)"""`
			`return _blocks_lock(cache_dir, fcntl.LOCK_EX)`


			`def free_disk_space_for(`
			`size: int,`
			`*,`
			`cache_dir: Optional[str],`
			`max_disk_space: Optional[int],`
			`os_quota: int = 1024**3, # Minimal space we should leave to keep OS function normally`
			`):`
			`if cache_dir is None:`
			`cache_dir = DEFAULT_CACHE_DIR`
			`cache_info = huggingface_hub.scan_cache_dir(cache_dir)`

			`available_space = shutil.disk_usage(cache_dir).free - os_quota`
			`if max_disk_space is not None:`
Allow free_disk_space_for() remove arbitrary files from Petals cache (#339) Before this PR, `free_disk_space_for()` was able to remove (a) only entire cached revisions (= git commits/branches) and (b) only from the repository we're loading right now. This PR allows this functions to remove arbitrary files separately from any repositories. This is useful for transition to Petals 1.2.0+, since it now uses original repos instead of the ones with converted models (see #323). In particular, the cache for `bigscience/bloom-petals` is now deprecated and should be removed in favor of `bigscience/bloom`. This is also useful as a way to free space before loading LoRA adapters (#335). 11 months ago			`available_space = min(available_space, max_disk_space - cache_info.size_on_disk)`
Add LLaMA support (#323) This PR: 1. Abolishes the model conversion procedure. Now, models are downloaded directly from original repositories like https://huggingface.co/bigscience/bloom. Servers download only shards with blocks to be hosted, and clients download only shards with input/output embeddings and layernorms. - BLOOM is loaded from `bigscience/bloom`, but we use the DHT prefix `bigscience/bloom-petals` for backward compatibility. Same with smaller BLOOMs and BLOOMZ. - LLaMA can be loaded from any repo like `username/llama-65b-hf`, but we use the DHT prefix `llama-65b-hf` (without the username) to accomodate blocks from different repos (there're a few of them with minor differences, such as `Llama` vs. `LLaMA` in the class name). 2. Refactors the client to generalize it for multiple models. Now, we have `petals.models` packages that contain model-specific code (e.g. `petals.models.bloom`, `petals.models.llama`). General code (e.g. CPU-efficient LM head, p-tuning) is kept in `petals.client`. 3. Introduces `WrappedLlamaBlock`, `DistributedLlamaConfig`, `DistributedLlamaForCausalLM`, `DistributedLlamaForSequenceClassification`, and `DistributedLlamaModel` compatible with Petals functionality (p-tuning, adapters, etc.). 4. Introduces `AutoDistributedConfig` that automatically chooses the correct config class (`DistributedLlamaConfig` or `DistributedBloomConfig`). The refactored configs contain all model-specific info for both clients and servers. Upgrade instructions: - Remove disk caches for blocks in old (converted) format to save disk space. That is, remove `~/.cache/petals/model--bigscience--bloom-petals` and `~/.cache/petals/model--bigscience--bloomz-petals` directories (if present). 11 months ago
			`gib = 1024**3`
			`logger.debug(f"Disk space: required {size / gib:.1f} GiB, available {available_space / gib:.1f} GiB")`
Clean up disk space (#152) 1 year ago			`if size <= available_space:`
			`return`

Allow free_disk_space_for() remove arbitrary files from Petals cache (#339) Before this PR, `free_disk_space_for()` was able to remove (a) only entire cached revisions (= git commits/branches) and (b) only from the repository we're loading right now. This PR allows this functions to remove arbitrary files separately from any repositories. This is useful for transition to Petals 1.2.0+, since it now uses original repos instead of the ones with converted models (see #323). In particular, the cache for `bigscience/bloom-petals` is now deprecated and should be removed in favor of `bigscience/bloom`. This is also useful as a way to free space before loading LoRA adapters (#335). 11 months ago			`cached_files = [file for repo in cache_info.repos for revision in repo.revisions for file in revision.files]`
Clean up disk space (#152) 1 year ago
Allow free_disk_space_for() remove arbitrary files from Petals cache (#339) Before this PR, `free_disk_space_for()` was able to remove (a) only entire cached revisions (= git commits/branches) and (b) only from the repository we're loading right now. This PR allows this functions to remove arbitrary files separately from any repositories. This is useful for transition to Petals 1.2.0+, since it now uses original repos instead of the ones with converted models (see #323). In particular, the cache for `bigscience/bloom-petals` is now deprecated and should be removed in favor of `bigscience/bloom`. This is also useful as a way to free space before loading LoRA adapters (#335). 11 months ago			`# Remove as few least recently used files as possible`
			`removed_files = []`
Clean up disk space (#152) 1 year ago			`freed_space = 0`
			`extra_space_needed = size - available_space`
Allow free_disk_space_for() remove arbitrary files from Petals cache (#339) Before this PR, `free_disk_space_for()` was able to remove (a) only entire cached revisions (= git commits/branches) and (b) only from the repository we're loading right now. This PR allows this functions to remove arbitrary files separately from any repositories. This is useful for transition to Petals 1.2.0+, since it now uses original repos instead of the ones with converted models (see #323). In particular, the cache for `bigscience/bloom-petals` is now deprecated and should be removed in favor of `bigscience/bloom`. This is also useful as a way to free space before loading LoRA adapters (#335). 11 months ago			`for file in sorted(cached_files, key=lambda file: file.blob_last_accessed):`
			`os.remove(file.file_path) # Remove symlink`
			`os.remove(file.blob_path) # Remove contents`

			`removed_files.append(file)`
			`freed_space += file.size_on_disk`
Clean up disk space (#152) 1 year ago			`if freed_space >= extra_space_needed:`
			`break`
Allow free_disk_space_for() remove arbitrary files from Petals cache (#339) Before this PR, `free_disk_space_for()` was able to remove (a) only entire cached revisions (= git commits/branches) and (b) only from the repository we're loading right now. This PR allows this functions to remove arbitrary files separately from any repositories. This is useful for transition to Petals 1.2.0+, since it now uses original repos instead of the ones with converted models (see #323). In particular, the cache for `bigscience/bloom-petals` is now deprecated and should be removed in favor of `bigscience/bloom`. This is also useful as a way to free space before loading LoRA adapters (#335). 11 months ago			`if removed_files:`
			`logger.info(f"Removed {len(removed_files)} files to free {freed_space / gib:.1f} GiB of disk space")`
			`logger.debug(f"Removed paths: {[str(file.file_path) for file in removed_files]}")`
Clean up disk space (#152) 1 year ago
			`if freed_space < extra_space_needed:`
			`raise RuntimeError(`
Fix GiBs in the "insufficient disk space" message (#187) 1 year ago			`f"Insufficient disk space to load a block. Please free {(extra_space_needed - freed_space) / gib:.1f} GiB "`
Clean up disk space (#152) 1 year ago			`f"on the volume for {cache_dir} or increase --max_disk_space if you set it manually"`
			`)`