|
|
|
@ -1,15 +1,16 @@
|
|
|
|
|
"""Flask app."""
|
|
|
|
|
import argparse
|
|
|
|
|
import json
|
|
|
|
|
import logging
|
|
|
|
|
import os
|
|
|
|
|
import socket
|
|
|
|
|
from typing import Dict
|
|
|
|
|
|
|
|
|
|
import pkg_resources
|
|
|
|
|
from flask import Flask, request
|
|
|
|
|
from flask import Flask, Response, request
|
|
|
|
|
|
|
|
|
|
from manifest.api.models.huggingface import HuggingFaceModel
|
|
|
|
|
from manifest.api.response import Response
|
|
|
|
|
from manifest.api.response import ModelResponse
|
|
|
|
|
|
|
|
|
|
os.environ["TOKENIZERS_PARALLELISM"] = "false"
|
|
|
|
|
|
|
|
|
@ -40,12 +41,6 @@ def parse_args() -> argparse.Namespace:
|
|
|
|
|
type=str,
|
|
|
|
|
help="Name of model or path to model. Used in initialize of model class.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--model_config",
|
|
|
|
|
default=None,
|
|
|
|
|
type=str,
|
|
|
|
|
help="Model config. Used in initialize of model class.",
|
|
|
|
|
)
|
|
|
|
|
parser.add_argument(
|
|
|
|
|
"--cache_dir", default=None, type=str, help="Cache directory for models."
|
|
|
|
|
)
|
|
|
|
@ -105,9 +100,8 @@ def main() -> None:
|
|
|
|
|
|
|
|
|
|
model_type = kwargs.model_type
|
|
|
|
|
model_name_or_path = kwargs.model_name_or_path
|
|
|
|
|
model_config = kwargs.model_config
|
|
|
|
|
if not model_name_or_path and not model_config:
|
|
|
|
|
raise ValueError("Must provide model_name_or_path or model_config.")
|
|
|
|
|
if not model_name_or_path:
|
|
|
|
|
raise ValueError("Must provide model_name_or_path.")
|
|
|
|
|
if kwargs.use_accelerate_multigpu:
|
|
|
|
|
logger.info("Using accelerate. Overridding --device argument.")
|
|
|
|
|
if (
|
|
|
|
@ -134,7 +128,6 @@ def main() -> None:
|
|
|
|
|
global model
|
|
|
|
|
model = MODEL_CONSTRUCTORS[model_type](
|
|
|
|
|
model_name_or_path,
|
|
|
|
|
model_config=model_config,
|
|
|
|
|
cache_dir=kwargs.cache_dir,
|
|
|
|
|
device=kwargs.device,
|
|
|
|
|
use_accelerate=kwargs.use_accelerate_multigpu,
|
|
|
|
@ -148,7 +141,7 @@ def main() -> None:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/completions", methods=["POST"])
|
|
|
|
|
def completions() -> Dict:
|
|
|
|
|
def completions() -> Response:
|
|
|
|
|
"""Get completions for generation."""
|
|
|
|
|
prompt = request.json["prompt"]
|
|
|
|
|
del request.json["prompt"]
|
|
|
|
@ -156,17 +149,28 @@ def completions() -> Dict:
|
|
|
|
|
|
|
|
|
|
if not isinstance(prompt, (str, list)):
|
|
|
|
|
raise ValueError("Prompt must be a str or list of str")
|
|
|
|
|
|
|
|
|
|
results_text = []
|
|
|
|
|
for generations in model.generate(prompt, **generation_args):
|
|
|
|
|
results_text.append(generations)
|
|
|
|
|
results = [{"text": r[0], "text_logprob": r[1]} for r in results_text]
|
|
|
|
|
# transform the result into the openai format
|
|
|
|
|
return Response(results, response_type="text_completion").__dict__()
|
|
|
|
|
try:
|
|
|
|
|
results_text = []
|
|
|
|
|
for generations in model.generate(prompt, **generation_args):
|
|
|
|
|
results_text.append(generations)
|
|
|
|
|
results = [{"text": r[0], "text_logprob": r[1]} for r in results_text]
|
|
|
|
|
# transform the result into the openai format
|
|
|
|
|
return Response(
|
|
|
|
|
json.dumps(
|
|
|
|
|
ModelResponse(results, response_type="text_completion").__dict__()
|
|
|
|
|
),
|
|
|
|
|
status=200,
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(e)
|
|
|
|
|
return Response(
|
|
|
|
|
json.dumps({"message": str(e)}),
|
|
|
|
|
status=400,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/choice_logits", methods=["POST"])
|
|
|
|
|
def choice_logits() -> Dict:
|
|
|
|
|
def choice_logits() -> Response:
|
|
|
|
|
"""Get maximal likely choice via max logits after generation."""
|
|
|
|
|
prompt = request.json["prompt"]
|
|
|
|
|
del request.json["prompt"]
|
|
|
|
@ -179,11 +183,24 @@ def choice_logits() -> Dict:
|
|
|
|
|
|
|
|
|
|
if not isinstance(gold_choices, list):
|
|
|
|
|
raise ValueError("Gold choices must be a list of string choices")
|
|
|
|
|
|
|
|
|
|
choice_score_list = model.logits_scoring(prompt, gold_choices, **generation_args)
|
|
|
|
|
results = [{"text": r[0], "text_logprob": r[1]} for r in choice_score_list]
|
|
|
|
|
# transform the result into the openai format
|
|
|
|
|
return Response(results, response_type="choice_selection").__dict__()
|
|
|
|
|
try:
|
|
|
|
|
choice_score_list = model.logits_scoring(
|
|
|
|
|
prompt, gold_choices, **generation_args
|
|
|
|
|
)
|
|
|
|
|
results = [{"text": r[0], "text_logprob": r[1]} for r in choice_score_list]
|
|
|
|
|
# transform the result into the openai format
|
|
|
|
|
return Response(
|
|
|
|
|
json.dumps(
|
|
|
|
|
ModelResponse(results, response_type="choice_selection").__dict__()
|
|
|
|
|
),
|
|
|
|
|
status=200,
|
|
|
|
|
)
|
|
|
|
|
except Exception as e:
|
|
|
|
|
logger.error(e)
|
|
|
|
|
return Response(
|
|
|
|
|
json.dumps({"message": str(e)}),
|
|
|
|
|
status=400,
|
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.route("/params", methods=["POST"])
|
|
|
|
|