Add RoPE Scaling params from llamacpp (#8422)

Description:
Just adding parameters from `llama-python-cpp` that support RoPE
scaling.
@hwchase17, @baskaryan

sources:
papers and explanation:
https://kaiokendev.github.io/context
llamacpp conversation:
https://github.com/ggerganov/llama.cpp/discussions/1965 
Supports models like:
https://huggingface.co/conceptofmind/LLongMA-2-13b
This commit is contained in:
Jeffrey Wang 2023-07-28 17:42:41 -04:00 committed by GitHub
parent 2db2987b1b
commit e0de62f6da
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -100,6 +100,12 @@ class LlamaCpp(LLM):
use_mmap: Optional[bool] = True
"""Whether to keep the model loaded in RAM"""
rope_freq_scale: float = 1.0
"""Scale factor for rope sampling."""
rope_freq_base: float = 10000.0
"""Base frequency for rope sampling."""
streaming: bool = True
"""Whether to stream the results, token by token."""
@ -111,6 +117,8 @@ class LlamaCpp(LLM):
"""Validate that llama-cpp-python library is installed."""
model_path = values["model_path"]
model_param_names = [
"rope_freq_scale",
"rope_freq_base",
"lora_path",
"lora_base",
"n_ctx",