|
|
|
@ -25,6 +25,11 @@ def main():
|
|
|
|
|
help="path or name of a pretrained model, converted with cli/convert_model.py")
|
|
|
|
|
group.add_argument('model', nargs='?', type=str, help="same as --converted_model_name_or_path")
|
|
|
|
|
|
|
|
|
|
group = parser.add_mutually_exclusive_group(required=False)
|
|
|
|
|
group.add_argument("--token", type=str, default=None, help="Hugging Face hub auth token for .from_pretrained()")
|
|
|
|
|
group.add_argument("--use_auth_token", action="store_true", dest="token",
|
|
|
|
|
help="Read token saved by `huggingface-cli login")
|
|
|
|
|
|
|
|
|
|
parser.add_argument('--num_blocks', type=int, default=None, help="The number of blocks to serve")
|
|
|
|
|
parser.add_argument('--block_indices', type=str, default=None, help="Specific block indices to serve")
|
|
|
|
|
parser.add_argument('--dht_prefix', type=str, default=None, help="Announce all blocks with this DHT prefix")
|
|
|
|
@ -132,7 +137,6 @@ def main():
|
|
|
|
|
parser.add_argument("--mean_balance_check_period", type=float, default=60,
|
|
|
|
|
help="Check the swarm's balance every N seconds (and rebalance it if necessary)")
|
|
|
|
|
|
|
|
|
|
parser.add_argument("--token", action='store_true', help="Hugging Face hub auth token for .from_pretrained()")
|
|
|
|
|
parser.add_argument('--quant_type', type=str, default=None, choices=[choice.name.lower() for choice in QuantType],
|
|
|
|
|
help="Quantize blocks to 8-bit (int8 from the LLM.int8() paper) or "
|
|
|
|
|
"4-bit (nf4 from the QLoRA paper) formats to save GPU memory. "
|
|
|
|
|