You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

84 lines
2.9 KiB

import fcntl
import os
import shutil
from contextlib import contextmanager
from pathlib import Path
from typing import Optional
import huggingface_hub
from hivemind.utils.logging import get_logger
logger = get_logger(__name__)
DEFAULT_CACHE_DIR = os.getenv("PETALS_CACHE", Path(Path.home(), ".cache", "petals"))
BLOCKS_LOCK_FILE = "blocks.lock"
def _blocks_lock(cache_dir: Optional[str], mode: int):
if cache_dir is None:
lock_path = Path(cache_dir, BLOCKS_LOCK_FILE)
os.makedirs(lock_path.parent, exist_ok=True)
with open(lock_path, "wb") as lock_fd:
fcntl.flock(lock_fd.fileno(), mode)
# The OS will release the lock when lock_fd is closed or the process is killed
def allow_cache_reads(cache_dir: Optional[str]):
"""Allows simultaneous reads, guarantees that blocks won't be removed along the way (shared lock)"""
return _blocks_lock(cache_dir, fcntl.LOCK_SH)
def allow_cache_writes(cache_dir: Optional[str]):
"""Allows saving new blocks and removing the old ones (exclusive lock)"""
return _blocks_lock(cache_dir, fcntl.LOCK_EX)
def free_disk_space_for(
size: int,
cache_dir: Optional[str],
max_disk_space: Optional[int],
os_quota: int = 1024**3, # Minimal space we should leave to keep OS function normally
if cache_dir is None:
cache_info = huggingface_hub.scan_cache_dir(cache_dir)
available_space = shutil.disk_usage(cache_dir).free - os_quota
if max_disk_space is not None:
available_space = min(available_space, max_disk_space - cache_info.size_on_disk)
Add LLaMA support (#323) This PR: 1. **Abolishes the model conversion procedure.** Now, models are downloaded directly from original repositories like Servers download only shards with blocks to be hosted, and clients download only shards with input/output embeddings and layernorms. - BLOOM is loaded from `bigscience/bloom`, but we use the DHT prefix `bigscience/bloom-petals` for backward compatibility. Same with smaller BLOOMs and BLOOMZ. - LLaMA can be loaded from any repo like `username/llama-65b-hf`, but we use the DHT prefix `llama-65b-hf` (without the username) to accomodate blocks from different repos (there're a few of them with minor differences, such as `Llama` vs. `LLaMA` in the class name). 2. **Refactors the client to generalize it for multiple models.** Now, we have `petals.models` packages that contain model-specific code (e.g. `petals.models.bloom`, `petals.models.llama`). General code (e.g. CPU-efficient LM head, p-tuning) is kept in `petals.client`. 3. **Introduces** `WrappedLlamaBlock`, `DistributedLlamaConfig`, `DistributedLlamaForCausalLM`, `DistributedLlamaForSequenceClassification`, and `DistributedLlamaModel` compatible with Petals functionality (p-tuning, adapters, etc.). 4. **Introduces** `AutoDistributedConfig` that automatically chooses the correct config class (`DistributedLlamaConfig` or `DistributedBloomConfig`). The refactored configs contain all model-specific info for both clients and servers. Upgrade instructions: - Remove disk caches for blocks in old (converted) format to save disk space. That is, remove `~/.cache/petals/model--bigscience--bloom-petals` and `~/.cache/petals/model--bigscience--bloomz-petals` directories (if present).
12 months ago
gib = 1024**3
logger.debug(f"Disk space: required {size / gib:.1f} GiB, available {available_space / gib:.1f} GiB")
if size <= available_space:
cached_files = [file for repo in cache_info.repos for revision in repo.revisions for file in revision.files]
# Remove as few least recently used files as possible
removed_files = []
freed_space = 0
extra_space_needed = size - available_space
for file in sorted(cached_files, key=lambda file: file.blob_last_accessed):
os.remove(file.file_path) # Remove symlink
os.remove(file.blob_path) # Remove contents
freed_space += file.size_on_disk
if freed_space >= extra_space_needed:
if removed_files:"Removed {len(removed_files)} files to free {freed_space / gib:.1f} GiB of disk space")
logger.debug(f"Removed paths: {[str(file.file_path) for file in removed_files]}")
if freed_space < extra_space_needed:
raise RuntimeError(
f"Insufficient disk space to load a block. Please free {(extra_space_needed - freed_space) / gib:.1f} GiB "
f"on the volume for {cache_dir} or increase --max_disk_space if you set it manually"