Add RoPE Scaling params from llamacpp (#8422)

Description: Just adding parameters from `llama-python-cpp` that support RoPE scaling. @hwchase17, @baskaryan sources: papers and explanation: https://kaiokendev.github.io/context llamacpp conversation: https://github.com/ggerganov/llama.cpp/discussions/1965 Supports models like: https://huggingface.co/conceptofmind/LLongMA-2-13b
2024-11-04 06:00:26 +00:00 · 2023-07-28 17:42:41 -04:00 · 2023-07-28 17:42:41 -04:00 · e0de62f6da
commit e0de62f6da
parent 2db2987b1b
1 changed files with 8 additions and 0 deletions
--- a/libs/langchain/langchain/llms/llamacpp.py
+++ b/libs/langchain/langchain/llms/llamacpp.py
@ -100,6 +100,12 @@ class LlamaCpp(LLM):
    use_mmap: Optional[bool] = True
    """Whether to keep the model loaded in RAM"""

+    rope_freq_scale: float = 1.0
+    """Scale factor for rope sampling."""
+
+    rope_freq_base: float = 10000.0
+    """Base frequency for rope sampling."""
+
    streaming: bool = True
    """Whether to stream the results, token by token."""

@ -111,6 +117,8 @@ class LlamaCpp(LLM):
        """Validate that llama-cpp-python library is installed."""
        model_path = values["model_path"]
        model_param_names = [
+            "rope_freq_scale",
+            "rope_freq_base",
            "lora_path",
            "lora_base",
            "n_ctx",