diff --git a/.flake8 b/.flake8 index a6a72e7..9d8d9eb 100644 --- a/.flake8 +++ b/.flake8 @@ -7,5 +7,5 @@ [flake8] exclude = .git max-line-length = 88 -ignore = E731, E402, W503, E203 +ignore = E731, E402, W503, E203, PAI100, PAI101, PAI201, PAI202, PAI203 per-file-ignores = __init__.py:F401, version.py:D100 diff --git a/manifest/api/app.py b/manifest/api/app.py index 670ba94..75ea2e5 100644 --- a/manifest/api/app.py +++ b/manifest/api/app.py @@ -20,6 +20,12 @@ PORT = int(os.environ.get("FLASK_PORT", 5000)) MODEL_CONSTRUCTORS = { "huggingface": HuggingFaceModel, } +try: + from manifest.api.models.zoo import ZooModel + + MODEL_CONSTRUCTORS["zoo"] = ZooModel # type: ignore +except ImportError: + logger.warning("Zoo model not available.") def parse_args() -> argparse.Namespace: @@ -31,14 +37,19 @@ def parse_args() -> argparse.Namespace: type=str, required=True, help="Model type used for finding constructor.", - choices=["huggingface"], + choices=["huggingface", "zoo"], ) parser.add_argument( - "--model_name", + "--model_name_or_path", default=None, type=str, - required=True, - help="Name of model. Used in initialize of model class.", + help="Name of model or path to model. Used in initialize of model class.", + ) + parser.add_argument( + "--model_config", + default=None, + type=str, + help="Model config. Used in initialize of model class.", ) parser.add_argument( "--cache_dir", default=None, type=str, help="Cache directory for models." @@ -79,7 +90,10 @@ def main() -> None: """Run main.""" kwargs = parse_args() model_type = kwargs.model_type - model_name = kwargs.model_name + model_name_or_path = kwargs.model_name_or_path + model_config = kwargs.model_config + if not model_name_or_path and not model_config: + raise ValueError("Must provide model_name_or_path or model_config.") use_accelerate = kwargs.use_accelerate_multigpu if use_accelerate: logger.info("Using accelerate. Overridding --device argument.") @@ -91,7 +105,8 @@ def main() -> None: # Global model global model model = MODEL_CONSTRUCTORS[model_type]( - model_name, + model_name_or_path, + model_config=model_config, cache_dir=kwargs.cache_dir, device=kwargs.device, use_accelerate=use_accelerate, @@ -112,9 +127,10 @@ def completions() -> Dict: if not isinstance(prompt, str): raise ValueError("Prompt must be a str") - results = [] + results_text = [] for generations in model.generate(prompt, **generation_args): - results.append(generations) + results_text.append(generations) + results = [{"text": r, "text_logprob": None} for r in results_text] # transform the result into the openai format return OpenAIResponse(results).__dict__() @@ -134,9 +150,10 @@ def choice_logits() -> Dict: if not isinstance(gold_choices, list): raise ValueError("Gold choices must be a list of string choices") - result = model.logits_scoring(prompt, gold_choices, **generation_args) + result, score = model.logits_scoring(prompt, gold_choices, **generation_args) + results = [{"text": result, "text_logprob": score}] # transform the result into the openai format - return OpenAIResponse([result]).__dict__() + return OpenAIResponse(results).__dict__() @app.route("/params", methods=["POST"]) diff --git a/manifest/api/models/huggingface.py b/manifest/api/models/huggingface.py index 99fcd9b..7a17bdb 100644 --- a/manifest/api/models/huggingface.py +++ b/manifest/api/models/huggingface.py @@ -1,10 +1,11 @@ """Huggingface model.""" import json from pathlib import Path -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple import torch from transformers import ( + AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokenizer, BloomForCausalLM, @@ -25,13 +26,18 @@ MODEL_REGISTRY = { "EleutherAI/gpt-neo-2.7B": GPTNeoForCausalLM, "EleutherAI/gpt-j-6B": GPTJForCausalLM, "EleutherAI/gpt-neox-20b": GPTNeoXForCausalLM, + "facebook/opt-125m": OPTForCausalLM, "facebook/opt-1.3b": OPTForCausalLM, "facebook/opt-2.7b": OPTForCausalLM, "facebook/opt-6.7b": OPTForCausalLM, "facebook/opt-13b": OPTForCausalLM, "facebook/opt-30b": OPTForCausalLM, "gpt2": GPT2LMHeadModel, + "bigscience/bloom-560m": BloomForCausalLM, + "bigscience/bloom-1b7": BloomForCausalLM, + "bigscience/bloom-3b": BloomForCausalLM, "bigscience/bloom-7b1": BloomForCausalLM, + "bigscience/bloom": AutoModelForCausalLM, "bigscience/T0pp": AutoModelForSeq2SeqLM, "bigscience/T0_3B": AutoModelForSeq2SeqLM, "google/t5-xl-lm-adapt": AutoModelForSeq2SeqLM, @@ -117,7 +123,8 @@ class HuggingFaceModel(Model): def __init__( self, - model_name: str, + model_name_or_path: str, + model_config: str, cache_dir: str, device: int, use_accelerate: bool, @@ -131,7 +138,8 @@ class HuggingFaceModel(Model): All arguments will be passed in the request from Manifest. Args: - model_name: model name string. + model_name_or_path: model name string. + model_config: model config string. cache_dir: cache directory for model. device: device to use for model. use_accelerate: whether to use accelerate for multi-gpu inference. @@ -142,32 +150,43 @@ class HuggingFaceModel(Model): if use_accelerate and use_parallelize: raise ValueError("Cannot use both accelerate and parallelize") # Check if providing path - self.model_path = model_name + self.model_path = model_name_or_path if Path(self.model_path).exists() and Path(self.model_path).is_dir(): # Try to find config if (Path(self.model_path) / "config.json").exists(): config = json.load(open(Path(self.model_path) / "config.json")) - model_name = config["_name_or_path"] - self.model_name = model_name + model_name_or_path = config["_name_or_path"] + self.model_name = model_name_or_path print("Model Name:", self.model_name, "Model Path:", self.model_path) try: - tokenizer = AutoTokenizer.from_pretrained(model_name) + tokenizer = AutoTokenizer.from_pretrained( + self.model_name, truncation_side="left" + ) except ValueError: - tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=False) - + tokenizer = AutoTokenizer.from_pretrained( + self.model_name, truncation_side="left", use_fast=False + ) dtype = torch.float16 if use_fp16 else "auto" - try: - # Try to explicitely find a fp16 copy (gpt-j-6B for example) - model = MODEL_REGISTRY[model_name].from_pretrained( # type: ignore + if self.model_name == "bigscience/bloom": + model = MODEL_REGISTRY[self.model_name].from_pretrained( # type: ignore self.model_path, cache_dir=cache_dir, - revision="float16", - torch_dtype=torch.float16, - ) - except Exception: - model = MODEL_REGISTRY[model_name].from_pretrained( # type: ignore - self.model_path, cache_dir=cache_dir, torch_dtype=dtype + load_in_8bit=True, + device_map="auto", ) + else: + try: + # Try to explicitely find a fp16 copy (gpt-j-6B for example) + model = MODEL_REGISTRY[self.model_name].from_pretrained( # type: ignore + self.model_path, + cache_dir=cache_dir, + revision="float16", + torch_dtype=torch.float16, + ) + except Exception: + model = MODEL_REGISTRY[self.model_name].from_pretrained( # type: ignore + self.model_path, cache_dir=cache_dir, torch_dtype=dtype + ) model.eval() print(f"Loaded Model DType {model.dtype}") @@ -175,20 +194,21 @@ class HuggingFaceModel(Model): if not self.is_encdec: tokenizer.pad_token = tokenizer.eos_token - if use_accelerate: - self._dispatch_accelerate_model(model, perc_max_gpu_mem_red) - device = 0 - elif use_parallelize: - model.parallelize() - device = 0 - else: - if device > -1: - torch_device = ( - torch.device("cpu") - if (device == -1 or not torch.cuda.is_available()) - else torch.device(f"cuda:{device}") - ) - model = model.to(torch_device) # type: ignore + if self.model_name != "bigscience/bloom": + if use_accelerate: + self._dispatch_accelerate_model(model, perc_max_gpu_mem_red) + device = 0 + elif use_parallelize: + model.parallelize() + device = 0 + else: + if device > -1: + torch_device = ( + torch.device("cpu") + if (device == -1 or not torch.cuda.is_available()) + else torch.device(f"cuda:{device}") + ) + model = model.to(torch_device) # type: ignore self.pipeline = Pipeline( # type: ignore model=model, tokenizer=tokenizer, device=device ) @@ -258,6 +278,7 @@ class HuggingFaceModel(Model): dispatch_model(model, device_map=device_map) return + @torch.no_grad() def generate(self, prompt: str, **kwargs: Any) -> List[str]: """ Generate the prompt from model. @@ -303,9 +324,10 @@ class HuggingFaceModel(Model): final_results = [r["generated_text"][start_idx:] for r in result] return final_results + @torch.no_grad() def logits_scoring( self, prompt: str, gold_choices: List[str], **kwargs: Any - ) -> str: + ) -> Tuple[str, float]: """ Given the prompt and gold choices, choose the best choice with max logits. @@ -461,4 +483,4 @@ class HuggingFaceModel(Model): if not self.is_encdec: seq_log_prob = seq_log_prob * (1 / (seq_token_log_probs != 0).sum(dim=-1)) prediction = seq_log_prob.argmax(dim=-1).item() - return gold_choices[int(prediction)] + return gold_choices[int(prediction)], seq_log_prob[int(prediction)].item() diff --git a/manifest/api/models/model.py b/manifest/api/models/model.py index 7723a85..f532c7e 100644 --- a/manifest/api/models/model.py +++ b/manifest/api/models/model.py @@ -1,20 +1,37 @@ """Model class.""" from abc import ABC, abstractmethod -from typing import Any, Dict, List +from typing import Any, Dict, List, Tuple class Model(ABC): """Model class.""" @abstractmethod - def __init__(self, model_name: str, **kwargs: Any): + def __init__( + self, + model_name_or_path: str, + model_config: str, + cache_dir: str, + device: int, + use_accelerate: bool, + use_parallelize: bool, + perc_max_gpu_mem_red: float, + use_fp16: bool, + ): """ Initialize model. - kwargs are passed to model as default parameters. + All arguments will be passed in the request from Manifest. Args: - model_name: model name string. + model_name_or_path: model name string. + model_config: model config string. + cache_dir: cache directory for model. + device: device to use for model. + use_accelerate: whether to use accelerate for multi-gpu inference. + use_parallelize: use HF default parallelize + perc_max_gpu_mem_red: percent max memory reduction in accelerate + use_fp16: use fp16 for model weights. """ raise NotImplementedError() @@ -37,3 +54,19 @@ class Model(ABC): list of generated text (list of length 1 for 1 generation). """ raise NotImplementedError() + + @abstractmethod + def logits_scoring( + self, prompt: str, gold_choices: List[str], **kwargs: Any + ) -> Tuple[str, float]: + """ + Given the prompt and gold choices, choose the best choice with max logits. + + Args: + prompt: promt to generate from. + gold_choices: list of choices to choose from. + + Returns: + the returned gold choice and the score. + """ + raise NotImplementedError() diff --git a/manifest/api/models/zoo.py b/manifest/api/models/zoo.py new file mode 100644 index 0000000..667a11e --- /dev/null +++ b/manifest/api/models/zoo.py @@ -0,0 +1,94 @@ +"""Zoo model.""" +import os +import sys +from typing import Any, Dict, List, Tuple + +from manifest.api.models.model import Model + +ZOO_PATH = os.environ.get("ZOO_PATH", None) +if not ZOO_PATH: + raise ImportError("ZOO_PATH environment variable not set.") +sys.path.append(ZOO_PATH) + +from src.models.s4_seq import S4LMManifest # type: ignore + + +class ZooModel(Model): + """Zoo model.""" + + def __init__( + self, + model_name_or_path: str, + model_config: str, + cache_dir: str, + device: int, + use_accelerate: bool, + use_parallelize: bool, + perc_max_gpu_mem_red: float, + use_fp16: bool, + ): + """ + Initialize model. + + All arguments will be passed in the request from Manifest. + + Args: + model_name_or_path: model name string. + model_config: model config path. + cache_dir: cache directory for model. + device: device to use for model. + use_accelerate: whether to use accelerate for multi-gpu inference. + use_parallelize: use HF default parallelize + perc_max_gpu_mem_red: percent max memory reduction in accelerate + use_fp16: use fp16 for model weights. + """ + # Check if providing path + self.model_path = model_name_or_path + self.model_config = model_config + if not self.model_config: + raise ValueError("Must provide model config.") + self.model = S4LMManifest( + config_path=self.model_config, + weights_path=self.model_path, + ) + # Can only load this after the model has been initialized + self.model_name = self.model.get_model_name() + + def get_init_params(self) -> Dict: + """Return init params to determine what model is being used.""" + return { + "model_name": self.model_name, + "model_path": self.model_path, + "model_config": self.model_config, + } + + def generate(self, prompt: str, **kwargs: Any) -> List[str]: + """ + Generate the prompt from model. + + Outputs must be generated text, not including prompt. + + Args: + prompt: promt to generate from. + + Returns: + list of generated text (list of length 1 for 1 generation). + """ + print(prompt) + final_results = self.model.generate(prompt, **kwargs) + return final_results + + def logits_scoring( + self, prompt: str, gold_choices: List[str], **kwargs: Any + ) -> Tuple[str, float]: + """ + Given the prompt and gold choices, choose the best choice with max logits. + + Args: + prompt: promt to generate from. + gold_choices: list of choices to choose from. + + Returns: + the returned gold choice and the score + """ + raise NotImplementedError() diff --git a/manifest/api/response.py b/manifest/api/response.py index 41cf9da..80005e7 100644 --- a/manifest/api/response.py +++ b/manifest/api/response.py @@ -2,13 +2,13 @@ import time import uuid -from typing import Any, Dict +from typing import Any, Dict, List class OpenAIResponse: """OpenAI response.""" - def __init__(self, results: list) -> None: + def __init__(self, results: List[Dict[str, Any]]) -> None: """Initialize response.""" self.results = results self.response_id = str(uuid.uuid4()) @@ -23,7 +23,8 @@ class OpenAIResponse: "model": "flask_model", "choices": [ { - "text": result, + "text": result["text"], + "text_logprob": result["text_logprob"], # TODO: Add in more metadata for HF models # "logprobs": { # "tokens": result["tokens"], diff --git a/manifest/clients/ai21.py b/manifest/clients/ai21.py index ec11479..c1a1cf1 100644 --- a/manifest/clients/ai21.py +++ b/manifest/clients/ai21.py @@ -144,3 +144,19 @@ class AI21Client(Client): return self.format_response(res.json()) return _run_completion, request_params + + def get_choice_logit_request( + self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function for choosing max choices. + + Args: + query: query string. + gold_choices: choices for model to choose from via max logits. + + Returns: + request function that takes no input. + request parameters as dict. + """ + raise NotImplementedError("AI21 does not support choice logit request.") diff --git a/manifest/clients/client.py b/manifest/clients/client.py index 308205a..2ae3278 100644 --- a/manifest/clients/client.py +++ b/manifest/clients/client.py @@ -81,3 +81,20 @@ class Client(ABC): request parameters as dict. """ raise NotImplementedError() + + @abstractmethod + def get_choice_logit_request( + self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function for choosing max choices. + + Args: + query: query string. + gold_choices: choices for model to choose from via max logits. + + Returns: + request function that takes no input. + request parameters as dict. + """ + raise NotImplementedError() diff --git a/manifest/clients/crfm.py b/manifest/clients/crfm.py index a9a50e0..40b1102 100644 --- a/manifest/clients/crfm.py +++ b/manifest/clients/crfm.py @@ -149,3 +149,19 @@ class CRFMClient(Client): return self.format_response(request_result) return _run_completion, request_params + + def get_choice_logit_request( + self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function for choosing max choices. + + Args: + query: query string. + gold_choices: choices for model to choose from via max logits. + + Returns: + request function that takes no input. + request parameters as dict. + """ + raise NotImplementedError("CRFM does not support choice logit request.") diff --git a/manifest/clients/openai.py b/manifest/clients/openai.py index e6d5f6e..cd6bf7b 100644 --- a/manifest/clients/openai.py +++ b/manifest/clients/openai.py @@ -12,6 +12,8 @@ logger = logging.getLogger(__name__) OPENAI_ENGINES = { "text-davinci-002", + "text-davinci-001", + "davinci", "text-curie-001", "text-babbage-001", "text-ada-001", @@ -116,3 +118,19 @@ class OpenAIClient(Client): raise e return _run_completion, request_params + + def get_choice_logit_request( + self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function for choosing max choices. + + Args: + query: query string. + gold_choices: choices for model to choose from via max logits. + + Returns: + request function that takes no input. + request parameters as dict. + """ + raise NotImplementedError("OpenAI does not support choice logit request.") diff --git a/manifest/clients/opt.py b/manifest/clients/opt.py index 1b0a985..c15a303 100644 --- a/manifest/clients/opt.py +++ b/manifest/clients/opt.py @@ -86,3 +86,19 @@ class OPTClient(Client): return res.json() return _run_completion, request_params + + def get_choice_logit_request( + self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function for choosing max choices. + + Args: + query: query string. + gold_choices: choices for model to choose from via max logits. + + Returns: + request function that takes no input. + request parameters as dict. + """ + raise NotImplementedError("OPT does not support choice logit request.") diff --git a/manifest/clients/zoo.py b/manifest/clients/zoo.py new file mode 100644 index 0000000..62266af --- /dev/null +++ b/manifest/clients/zoo.py @@ -0,0 +1,102 @@ +"""Zoo client.""" +import logging +from typing import Any, Callable, Dict, List, Optional, Tuple + +import requests + +from manifest.clients.client import Client + +logger = logging.getLogger(__name__) + +# User param -> (client param, default value) +ZOO_PARAMS: Dict[str, Tuple[str, str]] = {} + + +class ZooClient(Client): + """Zoo client.""" + + def connect( + self, + connection_str: Optional[str] = None, + client_args: Dict[str, Any] = {}, + ) -> None: + """ + Connect to the model. + + Args: + connection_str: connection string. + client_args: client arguments. + """ + self.host = connection_str.rstrip("/") + for key in ZOO_PARAMS: + setattr(self, key, client_args.pop(key, ZOO_PARAMS[key][1])) + self.model_params = self.get_model_params() + + def close(self) -> None: + """Close the client.""" + pass + + def get_model_params(self) -> Dict: + """ + Get model params. + + By getting model params from the server, we can add to request + and make sure cache keys are unique to model. + + Returns: + model params. + """ + res = requests.post(self.host + "/params") + return res.json() + + def get_model_inputs(self) -> List: + """ + Get allowable model inputs. + + Returns: + model inputs. + """ + return list(ZOO_PARAMS.keys()) + + def get_request( + self, query: str, request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function. + + Args: + query: query string. + + Returns: + request function that takes no input. + request parameters as dict. + """ + request_params = {"prompt": query} + # Zoo is greedy and takes all params + # TODO: Once zoo is finalized, fix this + for key in list(request_args.keys()): + request_params[key] = request_args.pop(key, None) + request_params.update(self.model_params) + + def _run_completion() -> Dict: + post_str = self.host + "/completions" + res = requests.post(post_str, json=request_params) + return res.json() + + return _run_completion, request_params + + def get_choice_logit_request( + self, query: str, gold_choices: List[str], request_args: Dict[str, Any] = {} + ) -> Tuple[Callable[[], Dict], Dict]: + """ + Get request string function for choosing max choices. + + Args: + query: query string. + gold_choices: choices for model to choose from via max logits. + + Returns: + request function that takes no input. + request parameters as dict. + """ + raise NotImplementedError("Zoo does not support choice logit request.") diff --git a/manifest/manifest.py b/manifest/manifest.py index 699ce1f..b7506dc 100644 --- a/manifest/manifest.py +++ b/manifest/manifest.py @@ -12,6 +12,7 @@ from manifest.clients.dummy import DummyClient from manifest.clients.huggingface import HuggingFaceClient from manifest.clients.openai import OpenAIClient from manifest.clients.opt import OPTClient +from manifest.clients.zoo import ZooClient from manifest.prompt import Prompt from manifest.response import Response from manifest.session import Session @@ -25,6 +26,7 @@ CLIENT_CONSTRUCTORS = { "huggingface": HuggingFaceClient, "opt": OPTClient, "dummy": DummyClient, + "zoo": ZooClient, } CACHE_CONSTRUCTORS = { @@ -83,12 +85,12 @@ class Manifest: ) self.client_name = client_name # Must pass kwargs as dict for client "pop" methods removed used arguments - self.client = CLIENT_CONSTRUCTORS[client_name]( # type: ignore - client_connection, client_args=kwargs - ) self.cache = CACHE_CONSTRUCTORS[cache_name]( # type: ignore cache_connection, cache_args=kwargs ) + self.client = CLIENT_CONSTRUCTORS[client_name]( # type: ignore + client_connection, client_args=kwargs + ) self.session = Session(session_id) if len(kwargs) > 0: raise ValueError(f"{list(kwargs.items())} arguments are not recognized.") diff --git a/setup.py b/setup.py index ed864ac..4769a58 100644 --- a/setup.py +++ b/setup.py @@ -44,6 +44,7 @@ REQUIRED = [ # What packages are optional? EXTRAS = { "dev": [ + "autopep8>=1.6.0", "black>=22.3.0", "isort>=5.9.3", "flake8>=4.0.0",