diff --git a/langchain/llms/llamacpp.py b/langchain/llms/llamacpp.py index 0c83c7635d..a42c2b9263 100644 --- a/langchain/llms/llamacpp.py +++ b/langchain/llms/llamacpp.py @@ -31,13 +31,13 @@ class LlamaCpp(LLM): """Token context window.""" n_parts: int = Field(-1, alias="n_parts") - """Number of parts to split the model into. + """Number of parts to split the model into. If -1, the number of parts is automatically determined.""" seed: int = Field(-1, alias="seed") """Seed. If -1, a random seed is used.""" - f16_kv: bool = Field(False, alias="f16_kv") + f16_kv: bool = Field(True, alias="f16_kv") """Use half-precision for key/value cache.""" logits_all: bool = Field(False, alias="logits_all") @@ -50,7 +50,7 @@ class LlamaCpp(LLM): """Force system to keep model in RAM.""" n_threads: Optional[int] = Field(None, alias="n_threads") - """Number of threads to use. + """Number of threads to use. If None, the number of threads is automatically determined.""" n_batch: Optional[int] = Field(8, alias="n_batch")